blob: bfd2043c4fdbdbebcdfcfc8c1156b98e96936b0f [file] [log] [blame]
#!/usr/bin/perl
#
# update_mime_types.pl: Read an existing Apache mime.types file and
# merge its entries with any new types discovered within an
# IANA media-types.xml file (see below for obtaining it).
#
# All existing mime.types entries are preserved as is (aside from sorting).
# Any new registered types are merged as a commented-out entry without
# an assigned extension, and then the entire file is printed to stdout.
#
# Typical use would be something like:
#
# wget -N https://www.iana.org/assignments/media-types/media-types.xml
# ./update_mime_types.pl mime.types > new.types
# diff -u mime.types new.types ; check the differences
# rm mime.types && mv new.types mime.types ; only if diffs are good
#
# Note that we assume most files are in the current working directory
# and efficiency is not an issue. The first argument, if any, is the
# path to the existing mime.types file (or its directory).
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
my $mity = 'mime.types';
my $medy = 'media-types.xml';
# if an argument is given, look there for mime.types
# otherwise, look in current working directory for all files
if ($#ARGV >= 0) {
if (-d $ARGV[0]) {
$mity = $ARGV[0] . '/' . $mity;
}
else {
$mity = $ARGV[0];
}
}
die "no $mity here\n" unless (-e $mity);
die "no $medy here\n" unless (-e $medy);
my $in_head = 1;
my @header = ();
my %mtype = ();
# Read through the Apache httpd mime.types file to create tables
# keyed on the minor type names. We save the entire input line as
# the hash value so that existing configs won't change when output.
# We assume the type names are already lowercased tokens.
#
die "cannot open $mity: $!" unless open (MIME, "<", $mity);
while (<MIME>) {
if ($in_head) {
push @header, $_;
if (/^# =========/) {
$in_head = 0;
}
next;
}
if (/^(# )?([a-z_\+\-\.]+\/\S+)/) {
$mtype{$2} = $_;
}
else {
warn "Skipping: ", $_;
}
}
close MIME;
# Read through the IANA media types registry, in XML form, and extract
# whatever looks to be a registered type based on the element structure.
# Yes, this is horribly fragile, but the format isn't expected to change.
#
die "cannot open $medy: $!" unless open (IANA, "<", $medy);
my $major = 'examples';
my $thistype = '';
while (<IANA>) {
last if (/^\s*<people>/);
next if (/(OBSOLETE|DEPRECATE)/);
if (/^\s*<registry id="([a-z_\+\-\.]+)"/) {
$major = $1;
next;
}
next if ($major eq 'examples');
if (/^\s*<name>([^<]+)<\/name>/) {
$thistype = lc "$major/$1";
if (!defined($mtype{$thistype})) {
$mtype{$thistype} = "# $thistype\n";
}
}
}
close IANA;
# Finally, output a replacement for Apache httpd's mime.types file
#
print @header;
foreach $key (sort(keys %mtype)) {
print $mtype{$key};
}
exit 0;