blob: a9e8a06f62f46618472ceeb94e91517373ccd56b [file] [log] [blame]
## ---------------------------------------------------------------------------
## Licensed to the Apache Software Foundation (ASF) under one or more
## contributor license agreements. See the NOTICE file distributed with
## this work for additional information regarding copyright ownership.
## The ASF licenses this file to You under the Apache License, Version 2.0
## (the "License"); you may not use this file except in compliance with
## the License. You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## ---------------------------------------------------------------------------
# Put the ApacheConAsia讲师信息收集表.xlsx、ApacheConSessions.xlsx、drawing1.xml file in the project root directory
# will be generated in the root directory mapping.csv file
from xml.dom.minidom import parse
from pypinyin import lazy_pinyin
import xml.dom.minidom
import openpyxl
import csv
# First, define a CSV file
head =["collect_row","pic","zh_name","en_name","position","track","title","mail","sessions_row"]
with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
csvf=csv.writer(f)
csvf.writerow(head)
# Using minidom parser to open XML document
DOMTree = xml.dom.minidom.parse("./drawing1.xml")
# Open the EXECL file
collect=openpyxl.load_workbook("./ApacheConAsia讲师信息收集表.xlsx")
session=openpyxl.load_workbook("./ApacheConSessions.xlsx")
# Getting data in a collection
collection = DOMTree.documentElement
twoCellAnchors = collection.getElementsByTagName("xdr"+":"+"twoCellAnchor")
# Take the string between two fixed symbols
def get_str_btw(str,begin,end):
par=str.partition(begin)
return (par[2].partition(end))[0][:]
# Get row and pic
for twoCellAnchor in twoCellAnchors:
# Get row
from1 = twoCellAnchor.getElementsByTagName('xdr'+':'+'to')[0]
row1=from1.getElementsByTagName('xdr'+':'+'row')[0]
row=int(row1.childNodes[0].data)
# Get pic
pic = twoCellAnchor.getElementsByTagName('xdr'+':'+'pic')[0]
blipFill = pic.getElementsByTagName('xdr'+':'+'blipFill')[0]
blip = blipFill.getElementsByTagName('a'+':'+'blip')[0]
if blip.hasAttribute("r"+":"+"embed"):
pic="picture"+(blip.getAttribute("r"+":"+"embed"))[3:]
# Gets the active table object
collect_active=collect.active
session_active=session.active
# Get the name, position, track and email address of the lecturer
zh_name=collect_active.cell(row,3).value
position=collect_active.cell(row,6).value
track=collect_active.cell(row,9).value
mail=collect_active.cell(row,10).value
# Judge whether it is a Chinese name
if '\u4e00' <=zh_name <='\u9fff':
# Translate Chinese name into English name
name_list=lazy_pinyin(zh_name)
xin=name_list[0]
ming_list=name_list[1:]
ming=""
for i in ming_list:
ming=ming+i
en_name=ming.capitalize()+"_"+xin.capitalize()
else:
en_name=zh_name
# According to the mail from the collection table, find the corresponding row from the session table.
# If there is no corresponding row, return 0
flag=False
for cell1 in session_active['J']:
str=cell1.value
mail_list=str.split(",")
for i in mail_list:
session_mail=get_str_btw(i,'<','>')
if session_mail==mail :
flag=True
session_row=cell1.row
title=session_active.cell(cell1.row,7).value
break
if flag==False:
session_row=0
title=collect_active.cell(row,4).value
# Write data to a CSV file
data=[
(row,pic,zh_name,en_name,position,track,title,mail,session_row)
]
with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
csvf=csv.writer(f)
csvf.writerows(data)