blob: d333aae2847cafe95980417b7ea30bdfaa7004ca [file] [log] [blame]
## ---------------------------------------------------------------------------
## Licensed to the Apache Software Foundation (ASF) under one or more
## contributor license agreements. See the NOTICE file distributed with
## this work for additional information regarding copyright ownership.
## The ASF licenses this file to You under the Apache License, Version 2.0
## (the "License"); you may not use this file except in compliance with
## the License. You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## ---------------------------------------------------------------------------
import numpy
import pandas as pd
from youdao_translator_api import translator
sessions = pd.read_excel('ApacheConSessions.xlsx')
tracks_dictionary = {
"Integration | 集成": "integration",
"Integration": "integration",
"Workflow/DataProcessing | 工作流/数据处理": "workflowdatagovernance",
"Workflow/DataProcessing": "workflowdatagovernance",
"Web Server/Tomcat | Web服务": "webserverandtomcat",
"Web Server/Tomcat": "webserverandtomcat",
"Streaming | 数据流": "streaming",
"Streaming": "streaming",
"Observability | 可观测性": "observability",
"Observability": "observability",
"Middleware | 中间件": "middleware",
"Middleware": "middleware",
"Messaging (Pulsar/Kafka/RocketMQ/ActiveMQ etc. ) | 消息": "messaging",
"Messaging (Pulsar/Kafka/RocketMQ/ActiveMQ etc. )": "messaging",
"IOT and IIOT | 物联网和工业物联网": "iot",
"IoT and IIoT": "iot",
"Incubator | 孵化器": "incubator",
"Incubator": "incubator",
"Data Visualization | 数据可视化": "datavisualization",
"Data Visualization": "datavisualization",
"Culture | 文化": "culture",
"Culture": "culture",
"Community | 社区": "community",
"Community": "community",
"Big Data | 大数据": "bigdata",
"Big Data": "bigdata",
"AI": "ai",
"AI | 人工智能": "ai",
"API/MicroService | API/微服务框架": "api",
"API/Microservice": "api",
"RPC | 远程过程调用": "rpc",
"RPC": "rpc",
"General | 常规": "rpc",
"General": "general"
}
session_types_dictionary = {"中文": "Chinese Session",
"英文": "English Session",
"Chinese": "Chinese Session",
"English": "English Session"}
session_types_chinese_dictionary = {"中文": "中文演讲",
"英文": "英文演讲",
"English": "英文演讲",
"Chinese": "中文演讲"}
column_name_transform_dict = {
"translator_type": "翻译语言",
"type": "演讲语言",
"track": "Track",
"title": "演讲题目",
"status": "审核状态",
"abstract": "内容摘要",
"speaker_name": "讲师{}-姓名",
"speaker_company": "讲师{}-单位名称",
"speaker_title": "讲师{}-职称",
"speaker_bios": "讲师{}-简介",
"time_schedule": "日程安排"
}
trans_cn_code = "zh-CHS"
trans_en_code = "en"
session_success_status = "审核通过"
translator_type_zh = "中文"
translator_type_en = "英文"
speaker_data = '{0}: {1}, {2}, {3}'
no_speaker_flag = "无"
def write_origin_file(file, file_name, mate_data, abstract, speakers_arr):
file.write(mate_data)
file.write(abstract)
file.write("\n ### Speakers: \n ")
head_img = '<img src="images/speaker/{}" width="200" />'
for index, speaker in enumerate(speakers_arr):
if index == 0:
head_img_name = file_name.split("-")[1]+".png"
else:
head_img_name = file_name.split("-")[1] + "_" + str(index+1) + ".png"
file.write(head_img.format(head_img_name))
file.write("<br>")
file.write(speaker_data.format(speaker[0], speaker[1], speaker[2], speaker[3]))
file.write("\r\n ")
file.close()
def write_translate_file(file, file_name, mate_data, abstract, speakers_arr, trans_from, trans_to):
file.write(mate_data)
abstract = translator(abstract, trans_from, trans_to)
file.write(abstract)
file.write("\n ### Speakers: \n ")
head_img = '<img src="images/speaker/{}" width="200" />'
for index, speaker in enumerate(speakers_arr):
name = translator(speaker[0], trans_from, trans_to)
company = translator(speaker[1], trans_from, trans_to)
title = translator(speaker[2], trans_from, trans_to)
bios = translator(speaker[3], trans_from, trans_to)
if index == 0:
head_img_name = file_name.split("-")[1] + ".png"
else:
head_img_name = file_name.split("-")[1] + "_" + str(index + 1) + ".png"
file.write(head_img.format(head_img_name))
file.write("<br>")
file.write(speaker_data.format(name, company, title, bios))
file.write("\r\n ")
file.close()
for index in sessions.index:
session_status = sessions.at[index, column_name_transform_dict.get("status")]
if session_status == session_success_status:
translator_type = sessions.at[index, column_name_transform_dict.get("translator_type")]
session_type = sessions.at[index, column_name_transform_dict.get("type")]
track_name = sessions.at[index, column_name_transform_dict.get("track")]
track_name = tracks_dictionary.get(track_name)
session_title = sessions.at[index, column_name_transform_dict.get("title")]
session_abstract = sessions.at[index, column_name_transform_dict.get("abstract")]
speakers_arr = []
for no in range(1, 4):
speaker_name_key = column_name_transform_dict.get("speaker_name").format(str(no))
speaker_name = sessions.at[index, speaker_name_key]
if pd.isnull(speaker_name) or speaker_name == no_speaker_flag:
continue
speaker_company_key = column_name_transform_dict.get("speaker_company").format(str(no))
speaker_company = sessions.at[index, speaker_company_key]
speaker_title_key = column_name_transform_dict.get("speaker_title").format(str(no))
speaker_title = sessions.at[index, speaker_title_key]
speaker_bios_key = column_name_transform_dict.get("speaker_bios").format(str(no))
speaker_bios = sessions.at[index, speaker_bios_key]
speakers_arr.append([speaker_name, speaker_company, speaker_title, speaker_bios])
# print(speakers_arr)
# create the markdown file
print(session_title)
file_name = str(index + 1000)
file_name = track_name + "-" + file_name
markdown_en_file = open(file_name + ".md", "w+", encoding="utf8")
markdown_zh_file = open(file_name + ".zh.md", "w+", encoding="utf8")
mate_data = '---\ntitle: "{}"\ndate: "{}" \ntrack: "{}"\npresenters: "{}"\nstype: "{}"\n---\n'
schedule_time = ""
if not pd.isnull(sessions.at[index, column_name_transform_dict.get("time_schedule")]):
schedule_time = sessions.at[index, column_name_transform_dict.get("time_schedule")]
speakers_arr = numpy.array(speakers_arr)
speakers = speakers_arr[:, 0]
speakers = ",".join(speakers)
if translator_type == translator_type_en:
session_type_en = session_types_dictionary.get(session_type)
mate_data_str = mate_data.format(session_title,
schedule_time,
track_name,
speakers,
session_type_en)
write_origin_file(markdown_en_file, file_name, mate_data_str, session_abstract, speakers_arr)
# Translate English to Chinese
session_title_cn = translator(session_title, trans_en_code, trans_cn_code)
session_type_cn = session_types_chinese_dictionary.get(session_type)
mate_data_str_cn = mate_data.format(session_title_cn,
schedule_time,
track_name,
speakers,
session_type_cn)
write_translate_file(markdown_zh_file, file_name, mate_data_str_cn, session_abstract, speakers_arr, trans_en_code,
trans_cn_code)
elif translator_type == translator_type_zh:
session_type_cn = session_types_chinese_dictionary.get(session_type)
mate_data_str = mate_data.format(session_title,
schedule_time,
track_name,
speakers,
session_type_cn)
write_origin_file(markdown_zh_file, file_name, mate_data_str, session_abstract, speakers_arr)
# Translate Chinese to English
session_type_en = session_types_dictionary.get(session_type)
session_title_en = translator(session_title, trans_cn_code, trans_en_code)
mate_data_str_en = mate_data.format(session_title_en,
schedule_time,
track_name,
speakers,
session_type_en)
write_translate_file(markdown_en_file, file_name, mate_data_str_en, session_abstract, speakers_arr, trans_cn_code,
trans_en_code)