script/mapping.py - apachecon-acasia - Git at Google

 ## ---------------------------------------------------------------------------
 ## Licensed to the Apache Software Foundation (ASF) under one or more
 ## contributor license agreements.  See the NOTICE file distributed with
 ## this work for additional information regarding copyright ownership.
 ## The ASF licenses this file to You under the Apache License, Version 2.0
 ## (the "License"); you may not use this file except in compliance with
 ## the License.  You may obtain a copy of the License at
 ##
 ##      http://www.apache.org/licenses/LICENSE-2.0
 ##
 ## Unless required by applicable law or agreed to in writing, software
 ## distributed under the License is distributed on an "AS IS" BASIS,
 ## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ## See the License for the specific language governing permissions and
 ## limitations under the License.
 ## ---------------------------------------------------------------------------

 # Put the ApacheConAsia讲师信息收集表.xlsx、ApacheConSessions.xlsx、drawing1.xml file in the project root directory
 # will be generated in the root directory mapping.csv file

 from xml.dom.minidom import parse
 from pypinyin import lazy_pinyin
 import xml.dom.minidom
 import openpyxl
 import csv


  # First, define a CSV file
 head =["collect_row","pic","zh_name","en_name","position","track","title","mail","sessions_row"]
 with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
    csvf=csv.writer(f)
    csvf.writerow(head)

 # Using minidom parser to open XML document
 DOMTree = xml.dom.minidom.parse("./drawing1.xml")

 # Open the EXECL file
 collect=openpyxl.load_workbook("./ApacheConAsia讲师信息收集表.xlsx")
 session=openpyxl.load_workbook("./ApacheConSessions.xlsx")

 # Getting data in a collection
 collection = DOMTree.documentElement
 twoCellAnchors = collection.getElementsByTagName("xdr"+":"+"twoCellAnchor")

 # Take the string between two fixed symbols
 def get_str_btw(str,begin,end):
    par=str.partition(begin)
    return (par[2].partition(end))[0][:]

 # Get row and pic
 for twoCellAnchor in twoCellAnchors:

    # Get row
    from1 = twoCellAnchor.getElementsByTagName('xdr'+':'+'to')[0]
    row1=from1.getElementsByTagName('xdr'+':'+'row')[0]
    row=int(row1.childNodes[0].data)

    # Get pic
    pic = twoCellAnchor.getElementsByTagName('xdr'+':'+'pic')[0]
    blipFill = pic.getElementsByTagName('xdr'+':'+'blipFill')[0]
    blip = blipFill.getElementsByTagName('a'+':'+'blip')[0]
    if blip.hasAttribute("r"+":"+"embed"):
       pic="picture"+(blip.getAttribute("r"+":"+"embed"))[3:]

    # Gets the active table object
    collect_active=collect.active
    session_active=session.active

    # Get the name, position, track and email address of the lecturer
    zh_name=collect_active.cell(row,3).value
    position=collect_active.cell(row,6).value
    track=collect_active.cell(row,9).value
    mail=collect_active.cell(row,10).value

    # Judge whether it is a Chinese name
    if '\u4e00' <=zh_name <='\u9fff':
       # Translate Chinese name into English name
       name_list=lazy_pinyin(zh_name)
       xin=name_list[0]
       ming_list=name_list[1:]
       ming=""
       for i in ming_list:
          ming=ming+i
       en_name=ming.capitalize()+"_"+xin.capitalize()
    else:
       en_name=zh_name

    # According to the mail from the collection table, find the corresponding row from the session table.
    # If there is no corresponding row, return 0
    flag=False
    for cell1 in session_active['J']:
       str=cell1.value
       mail_list=str.split(",")
       for i in mail_list:
          session_mail=get_str_btw(i,'<','>')
          if session_mail==mail :
             flag=True
             session_row=cell1.row
             title=session_active.cell(cell1.row,7).value
             break
    if flag==False:
       session_row=0
       title=collect_active.cell(row,4).value

    # Write data to a CSV file
    data=[
       (row,pic,zh_name,en_name,position,track,title,mail,session_row)
    ]
    with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
       csvf=csv.writer(f)
       csvf.writerows(data)
	## ---------------------------------------------------------------------------
	## Licensed to the Apache Software Foundation (ASF) under one or more
	## contributor license agreements. See the NOTICE file distributed with
	## this work for additional information regarding copyright ownership.
	## The ASF licenses this file to You under the Apache License, Version 2.0
	## (the "License"); you may not use this file except in compliance with
	## the License. You may obtain a copy of the License at
	##
	## http://www.apache.org/licenses/LICENSE-2.0
	##
	## Unless required by applicable law or agreed to in writing, software
	## distributed under the License is distributed on an "AS IS" BASIS,
	## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	## See the License for the specific language governing permissions and
	## limitations under the License.
	## ---------------------------------------------------------------------------

	# Put the ApacheConAsia讲师信息收集表.xlsx、ApacheConSessions.xlsx、drawing1.xml file in the project root directory
	# will be generated in the root directory mapping.csv file

	from xml.dom.minidom import parse
	from pypinyin import lazy_pinyin
	import xml.dom.minidom
	import openpyxl
	import csv


	# First, define a CSV file
	head =["collect_row","pic","zh_name","en_name","position","track","title","mail","sessions_row"]
	with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
	csvf=csv.writer(f)
	csvf.writerow(head)

	# Using minidom parser to open XML document
	DOMTree = xml.dom.minidom.parse("./drawing1.xml")

	# Open the EXECL file
	collect=openpyxl.load_workbook("./ApacheConAsia讲师信息收集表.xlsx")
	session=openpyxl.load_workbook("./ApacheConSessions.xlsx")

	# Getting data in a collection
	collection = DOMTree.documentElement
	twoCellAnchors = collection.getElementsByTagName("xdr"+":"+"twoCellAnchor")

	# Take the string between two fixed symbols
	def get_str_btw(str,begin,end):
	par=str.partition(begin)
	return (par[2].partition(end))[0][:]

	# Get row and pic
	for twoCellAnchor in twoCellAnchors:

	# Get row
	from1 = twoCellAnchor.getElementsByTagName('xdr'+':'+'to')[0]
	row1=from1.getElementsByTagName('xdr'+':'+'row')[0]
	row=int(row1.childNodes[0].data)

	# Get pic
	pic = twoCellAnchor.getElementsByTagName('xdr'+':'+'pic')[0]
	blipFill = pic.getElementsByTagName('xdr'+':'+'blipFill')[0]
	blip = blipFill.getElementsByTagName('a'+':'+'blip')[0]
	if blip.hasAttribute("r"+":"+"embed"):
	pic="picture"+(blip.getAttribute("r"+":"+"embed"))[3:]

	# Gets the active table object
	collect_active=collect.active
	session_active=session.active

	# Get the name, position, track and email address of the lecturer
	zh_name=collect_active.cell(row,3).value
	position=collect_active.cell(row,6).value
	track=collect_active.cell(row,9).value
	mail=collect_active.cell(row,10).value

	# Judge whether it is a Chinese name
	if '\u4e00' <=zh_name <='\u9fff':
	# Translate Chinese name into English name
	name_list=lazy_pinyin(zh_name)
	xin=name_list[0]
	ming_list=name_list[1:]
	ming=""
	for i in ming_list:
	ming=ming+i
	en_name=ming.capitalize()+"_"+xin.capitalize()
	else:
	en_name=zh_name

	# According to the mail from the collection table, find the corresponding row from the session table.
	# If there is no corresponding row, return 0
	flag=False
	for cell1 in session_active['J']:
	str=cell1.value
	mail_list=str.split(",")
	for i in mail_list:
	session_mail=get_str_btw(i,'<','>')
	if session_mail==mail :
	flag=True
	session_row=cell1.row
	title=session_active.cell(cell1.row,7).value
	break
	if flag==False:
	session_row=0
	title=collect_active.cell(row,4).value

	# Write data to a CSV file
	data=[
	(row,pic,zh_name,en_name,position,track,title,mail,session_row)
	]
	with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
	csvf=csv.writer(f)
	csvf.writerows(data)