blob: 3051eb2a9e2723365b93310e2a267b0f95ffd7e7 [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Module: download_data
This module provides utilities for downloading the datasets for training LipNet
"""
import os
from os.path import exists
from multi import multi_p_run, put_worker
def download_mp4(from_idx, to_idx, _params):
"""
download mp4s
"""
succ = set()
fail = set()
for idx in range(from_idx, to_idx):
name = 's' + str(idx)
save_folder = '{src_path}/{nm}'.format(src_path=_params['src_path'], nm=name)
if idx == 0 or os.path.isdir(save_folder):
continue
script = "http://spandh.dcs.shef.ac.uk/gridcorpus/{nm}/video/{nm}.mpg_vcd.zip".format( \
nm=name)
down_sc = 'cd {src_path} && curl {script} --output {nm}.mpg_vcd.zip && \
unzip {nm}.mpg_vcd.zip'.format(script=script,
nm=name,
src_path=_params['src_path'])
try:
print(down_sc)
os.system(down_sc)
succ.add(idx)
except OSError as error:
print(error)
fail.add(idx)
return (succ, fail)
def download_align(from_idx, to_idx, _params):
"""
download aligns
"""
succ = set()
fail = set()
for idx in range(from_idx, to_idx):
name = 's' + str(idx)
if idx == 0:
continue
script = "http://spandh.dcs.shef.ac.uk/gridcorpus/{nm}/align/{nm}.tar".format(nm=name)
down_sc = 'cd {align_path} && wget {script} && \
tar -xvf {nm}.tar'.format(script=script,
nm=name,
align_path=_params['align_path'])
try:
print(down_sc)
os.system(down_sc)
succ.add(idx)
except OSError as error:
print(error)
fail.add(idx)
return (succ, fail)
if __name__ == '__main__':
import argparse
PARSER = argparse.ArgumentParser()
PARSER.add_argument('--src_path', type=str, default='../data/mp4s')
PARSER.add_argument('--align_path', type=str, default='../data')
PARSER.add_argument('--n_process', type=int, default=1)
CONFIG = PARSER.parse_args()
PARAMS = {'src_path': CONFIG.src_path, 'align_path': CONFIG.align_path}
N_PROCESS = CONFIG.n_process
if exists('./shape_predictor_68_face_landmarks.dat') is False:
os.system('wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 && \
bzip2 -d shape_predictor_68_face_landmarks.dat.bz2')
os.makedirs('{src_path}'.format(src_path=PARAMS['src_path']), exist_ok=True)
os.makedirs('{align_path}'.format(align_path=PARAMS['align_path']), exist_ok=True)
if N_PROCESS == 1:
RES = download_mp4(0, 35, PARAMS)
RES = download_align(0, 35, PARAMS)
else:
# download movie files
RES = multi_p_run(tot_num=35, _func=put_worker, worker=download_mp4, \
params=PARAMS, n_process=N_PROCESS)
# download align files
RES = multi_p_run(tot_num=35, _func=put_worker, worker=download_align, \
params=PARAMS, n_process=N_PROCESS)
os.system('rm -f {src_path}/*.zip && rm -f {src_path}/*/Thumbs.db'.format( \
src_path=PARAMS['src_path']))
os.system('rm -f {align_path}/*.tar && rm -f {align_path}/Thumbs.db'.format( \
align_path=PARAMS['align_path']))