blob: 52e2771c5024ba224e89a3c84c9de848d2eb06b7 [file] [log] [blame]
#!/usr/bin/python -B
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# asfdata.py -- Pelican plugin that processes a yaml specification of data into a setting directory
#
from __future__ import unicode_literals
import pelican.plugins.signals
import pelican.utils
import os.path
import requests
import random
import yaml
import json
import ezt
ASF_DATA = {
'metadata': { },
'debug': False
}
def read_config(config_yaml):
with pelican.utils.pelican_open(config_yaml) as text:
config_data = yaml.load(text)
print(config_data)
return config_data
def url_data(url):
content = requests.get(url).text
parts = url.split('/')
extension = os.path.splitext(parts[-1])[1] # split off ext, keep ext
print(f"Loading {extension} from {url}")
if extension == ".json":
load = json.loads(content)
elif extension == ".yaml":
load = yaml.load(content)
else:
load = { }
return load
def remove_part(reference, part):
for refs in reference:
if refs == part:
del reference[part]
return
elif isinstance(reference[refs], dict):
remove_part(reference[refs], part)
def where_parts(reference, part):
# currently only works on True parts
filtered = [ ]
for refs in reference:
if not reference[refs][part]:
filtered.append(refs)
for refs in filtered:
del reference[refs]
def alpha_part(reference, part):
for refs in reference:
name = reference[refs][part]
if name == 'HTTP Server':
# when sorting by letter HTTPD Server is wanted first
letter = ' '
else:
letter = name[0].upper()
reference[refs]['letter'] = letter
def asfid_part(reference, part):
for refs in reference:
fix = reference[refs][part]
for k in fix:
availid = k
name = fix[k]['name']
reference[refs][part] = name
reference[refs]['availid'] = availid
def sequence_dict(seq, reference):
sequence = [ ]
for refs in reference:
if isinstance(reference[refs],dict):
reference[refs]['key_id'] = refs
for item in reference[refs]:
if isinstance(reference[refs][item],bool):
reference[refs][item] = ezt.boolean(reference[refs][item])
sequence.append(type(seq, (), reference[refs]))
return sequence
def split_list(metadata, seq, reference, split):
# copy sequence
sequence = list(reference)
# sort the copy
sequence.sort(key=lambda x: (x.letter, x.display_name))
# size of list
size = len(sequence)
# size of columns
percol = int((size+26+split-1)/split)
# positions
start = nseq = nrow = 0
letter = ' '
for column in range(split):
subsequence = [ ]
end = min(size+26, start+percol)
while nrow < end:
if letter < sequence[nseq].letter:
# new letter
letter = sequence[nseq].letter
subsequence.append(type(seq, (), { 'letter': letter, 'display_name': letter }))
else:
subsequence.append(sequence[nseq])
nseq = nseq+1
nrow = nrow+1
# save the column sequence in the metadata
metadata[f"{seq}_{column}"] = subsequence
start = end
if nseq < size:
print(f"WARNING: {seq} not all of sequence consumed: short {size-nseq} projects")
def process_sequence(metadata, seq, sequence, load, debug):
reference = load
# has been converted to a sequence
is_sequence = False
# has been converted to a dictionary - won't be made into a sequence
is_dictionary = False
# save metadata at the end
save_metadata = True
# description
if 'description' in sequence:
print(f"{seq}: {sequence['description']}")
# select sub dictionary
if 'path' in sequence:
if debug:
print(f"path: {sequence['path']}")
parts = sequence['path'].split('.')
for part in parts:
reference = reference[part]
# filter dictionary by attribute value. if filter is false discard
if 'where' in sequence:
if debug:
print(f"where: {sequence['where']}")
where_parts(reference, sequence['where'])
# remove irrelevant keys
if 'trim' in sequence:
if debug:
print(f"trim: {sequence['trim']}")
parts = sequence['trim'].split(',')
for part in parts:
remove_part(reference, part)
# transform roster and chair patterns
if 'asfid' in sequence:
if debug:
print(f"asfid: {sequence['asfid']}")
asfid_part(reference, sequence['asfid'])
# add first letter ofr alphabetic categories
if 'alpha' in sequence:
if debug:
print(f"alpha: {sequence['alpha']}")
alpha_part(reference, sequence['alpha'])
# this dictionary is derived from sequences
if 'dictionary' in sequence:
if debug:
print(f"dictionary: {sequence['dictionary']}")
reference = { }
paths = sequence['dictionary'].split(',')
for path in paths:
for key in load[path]:
reference[key] = load[path][key]
is_dictionary = True
# this sequence is derived from another sequence
if 'sequence' in sequence:
if debug:
print(f"sequence: {sequence['sequence']}")
reference = metadata[sequence['sequence']]
is_sequence = True
# this sequence is a random sample of another sequence
if 'random' in sequence:
if debug:
print(f"random: {sequence['random']}")
if is_sequence:
reference = random.sample(reference, sequence['random'])
else:
print(f"{seq} - random requires an existing sequence to sample")
# this sequence is a sorted list divided into multiple columns
if 'split' in sequence:
if debug:
print(f"split: {sequence['split']}")
if is_sequence:
split_list(metadata, seq, reference, sequence['split'])
save_metadata = False
else:
print(f"{seq} - split requires an existing sequence to split")
# convert the dictionary to a sequence of objects
if not is_sequence and not is_dictionary:
if debug:
print(f"{seq}: create sequence")
reference = sequence_dict(seq, reference)
# save sequence in metadata
if save_metadata:
metadata[seq] = reference
def process_load(metadata, value, key, load, debug):
for seq in value:
if seq != 'url':
# sequence
sequence = value[seq]
process_sequence(metadata, seq, sequence, load, debug)
def config_read_data(pelican):
from pelican.settings import DEFAULT_CONFIG
print("-----\nasfdata")
DEFAULT_CONFIG.setdefault('ASF_DATA', ASF_DATA)
if pelican:
pelican.settings.setdefault('ASF_DATA', ASF_DATA)
asf_data = pelican.settings.get('ASF_DATA', DEFAULT_CONFIG['ASF_DATA'])
for key in asf_data:
print(f"config: [{key}] = {asf_data[key]}")
if 'metadata' in asf_data:
metadata = asf_data['metadata']
else:
metadata = { }
if 'data' in asf_data:
print(f"Processing {asf_data['data']}")
config_data = read_config(asf_data['data'])
for key in config_data:
value = config_data[key]
if isinstance(value, dict):
print(f"{key} is a dict")
print(value)
if 'url' in value:
load = url_data(value['url'])
process_load(metadata, value, key, load, asf_data['debug'])
else:
metadata[key] = value
else:
print(f"{key} = {value}")
metadata[key] = value
pelican.settings['ASF_DATA']['metadata'] = metadata
print("-----")
for key in metadata:
print(f"metadata[{key}] =")
print(metadata[key])
print("-----")
def register():
pelican.plugins.signals.initialized.connect(config_read_data)