| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import sys |
| import os |
| import re |
| |
| reHREF = re.compile('<a.*?>(.*?)</a>', re.IGNORECASE) |
| |
| reMarkup = re.compile('<.*?>') |
| reDivBlock = re.compile('<div class="block">(.*?)</div>', re.IGNORECASE) |
| reCaption = re.compile('<caption><span>(.*?)</span>', re.IGNORECASE) |
| reJ8Caption = re.compile('<h3>(.*?) Summary</h3>') |
| reTDLastNested = re.compile('^<td class="colLast"><code><strong><a href="[^>]*\.([^>]*?)\.html" title="class in[^>]*">', re.IGNORECASE) |
| reTDLast = re.compile('^<td class="colLast"><code><strong><a href="[^>]*#([^>]*?)">', re.IGNORECASE) |
| reColOne = re.compile('^<td class="colOne"><code><strong><a href="[^>]*#([^>]*?)">', re.IGNORECASE) |
| reMemberNameLink = re.compile('^<td class="colLast"><code><span class="memberNameLink"><a href="[^>]*#([^>]*?)"', re.IGNORECASE) |
| reNestedClassMemberNameLink = re.compile('^<td class="colLast"><code><span class="memberNameLink"><a href="[^>]*?".*?>(.*?)</a>', re.IGNORECASE) |
| reMemberNameOneLink = re.compile('^<td class="colOne"><code><span class="memberNameLink"><a href="[^>]*#([^>]*?)"', re.IGNORECASE) |
| |
| # the Method detail section at the end |
| reMethodDetail = re.compile('^<h3>Method Detail</h3>$', re.IGNORECASE) |
| reMethodDetailAnchor = re.compile('^(?:</a>)?<a name="([^>]*?)">$', re.IGNORECASE) |
| reMethodOverridden = re.compile('^<dt><strong>(Specified by:|Overrides:)</strong></dt>$', re.IGNORECASE) |
| |
| reTag = re.compile("(?i)<(\/?\w+)((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>") |
| |
| def verifyHTML(s): |
| |
| stack = [] |
| upto = 0 |
| while True: |
| m = reTag.search(s, upto) |
| if m is None: |
| break |
| tag = m.group(1) |
| upto = m.end(0) |
| |
| if tag[:1] == '/': |
| justTag = tag[1:] |
| else: |
| justTag = tag |
| |
| if justTag.lower() in ('br', 'li', 'p', 'col'): |
| continue |
| |
| if tag[:1] == '/': |
| if len(stack) == 0: |
| raise RuntimeError('saw closing "%s" without opening <%s...>' % (m.group(0), tag[1:])) |
| elif stack[-1][0] != tag[1:].lower(): |
| raise RuntimeError('closing "%s" does not match opening "%s"' % (m.group(0), stack[-1][1])) |
| stack.pop() |
| else: |
| stack.append((tag.lower(), m.group(0))) |
| |
| if len(stack) != 0: |
| raise RuntimeError('"%s" was never closed' % stack[-1][1]) |
| |
| def cleanHTML(s): |
| s = reMarkup.sub('', s) |
| s = s.replace(' ', ' ') |
| s = s.replace('<', '<') |
| s = s.replace('>', '>') |
| s = s.replace('&', '&') |
| return s.strip() |
| |
| reH3 = re.compile('^<h3>(.*?)</h3>', re.IGNORECASE | re.MULTILINE) |
| reH4 = re.compile('^<h4>(.*?)</h4>', re.IGNORECASE | re.MULTILINE) |
| reDetailsDiv = re.compile('<div class="details">') |
| reEndOfClassData = re.compile('<!--.*END OF CLASS DATA.*-->') |
| reBlockList = re.compile('<ul class="blockList(?:Last)?">') |
| reCloseUl = re.compile('</ul>') |
| |
| def checkClassDetails(fullPath): |
| """ |
| Checks for invalid HTML in the full javadocs under each field/method. |
| """ |
| |
| # TODO: only works with java7 generated javadocs now! |
| with open(fullPath, encoding='UTF-8') as f: |
| desc = [] |
| cat = None |
| item = None |
| errors = [] |
| inDetailsDiv = False |
| blockListDepth = 0 |
| for line in f.readlines(): |
| # Skip content up until <div class="details"> |
| if not inDetailsDiv: |
| if reDetailsDiv.match(line) is not None: |
| inDetailsDiv = True |
| continue |
| |
| # Stop looking at content at closing details </div>, which is just before <!-- === END OF CLASS DATA === --> |
| if reEndOfClassData.match(line) is not None: |
| if len(desc) != 0: |
| try: |
| verifyHTML(''.join(desc)) |
| except RuntimeError as re: |
| #print(' FAILED: %s' % re) |
| errors.append((cat, item, str(re))) |
| break |
| |
| # <ul class="blockList(Last)"> is the boundary between items |
| if reBlockList.match(line) is not None: |
| blockListDepth += 1 |
| if len(desc) != 0: |
| try: |
| verifyHTML(''.join(desc)) |
| except RuntimeError as re: |
| #print(' FAILED: %s' % re) |
| errors.append((cat, item, str(re))) |
| del desc[:] |
| |
| if blockListDepth == 3: |
| desc.append(line) |
| |
| if reCloseUl.match(line) is not None: |
| blockListDepth -= 1 |
| else: |
| m = reH3.search(line) |
| if m is not None: |
| cat = m.group(1) |
| else: |
| m = reH4.search(line) |
| if m is not None: |
| item = m.group(1) |
| |
| if len(errors) != 0: |
| print() |
| print(fullPath) |
| for cat, item, message in errors: |
| print(' broken details HTML: %s: %s: %s' % (cat, item, message)) |
| return True |
| else: |
| return False |
| |
| def checkClassSummaries(fullPath): |
| #print("check %s" % fullPath) |
| |
| # TODO: only works with java7 generated javadocs now! |
| f = open(fullPath, encoding='UTF-8') |
| |
| missing = [] |
| broken = [] |
| inThing = False |
| lastCaption = None |
| lastItem = None |
| |
| desc = None |
| |
| foundMethodDetail = False |
| lastMethodAnchor = None |
| lineCount = 0 |
| |
| for line in f.readlines(): |
| m = reMethodDetail.search(line) |
| lineCount += 1 |
| if m is not None: |
| foundMethodDetail = True |
| #print(' got method detail') |
| continue |
| |
| # prune methods that are just @Overrides of other interface/classes, |
| # they should be specified elsewhere, if they are e.g. jdk or |
| # external classes we cannot inherit their docs anyway |
| if foundMethodDetail: |
| m = reMethodDetailAnchor.search(line) |
| if m is not None: |
| lastMethodAnchor = m.group(1) |
| continue |
| isOverrides = '>Overrides:<' in line or '>Specified by:<' in line |
| #print('check for removing @overridden method: %s; %s; %s' % (lastMethodAnchor, isOverrides, missing)) |
| if isOverrides and ('Methods', lastMethodAnchor) in missing: |
| #print('removing @overridden method: %s' % lastMethodAnchor) |
| missing.remove(('Methods', lastMethodAnchor)) |
| |
| m = reCaption.search(line) |
| if m is not None: |
| lastCaption = m.group(1) |
| #print(' caption %s' % lastCaption) |
| else: |
| m = reJ8Caption.search(line) |
| if m is not None: |
| lastCaption = m.group(1) |
| if not lastCaption.endswith('s'): |
| lastCaption += 's' |
| #print(' caption %s' % lastCaption) |
| |
| # Try to find the item in question (method/member name): |
| for matcher in (reTDLastNested, # nested classes |
| reTDLast, # methods etc. |
| reColOne, # ctors etc. |
| reMemberNameLink, # java 8 |
| reNestedClassMemberNameLink, # java 8, nested class |
| reMemberNameOneLink): # java 8 ctors |
| m = matcher.search(line) |
| if m is not None: |
| lastItem = m.group(1) |
| #print(' found item %s; inThing=%s' % (lastItem, inThing)) |
| break |
| |
| lineLower = line.strip().lower() |
| |
| if lineLower.find('<tr class="') != -1 or lineLower.find('<tr id="') != -1: |
| inThing = True |
| hasDesc = False |
| continue |
| |
| if inThing: |
| if lineLower.find('</tr>') != -1: |
| #print(' end item %s; hasDesc %s' % (lastItem, hasDesc)) |
| if not hasDesc: |
| if lastItem is None: |
| raise RuntimeError('failed to locate javadoc item in %s, line %d? last line: %s' % (fullPath, lineCount, line.rstrip())) |
| missing.append((lastCaption, unEscapeURL(lastItem))) |
| #print(' add missing; now %d: %s' % (len(missing), str(missing))) |
| inThing = False |
| continue |
| else: |
| if line.find('<div class="block">') != -1: |
| desc = [] |
| if desc is not None: |
| desc.append(line) |
| if line.find('</div>') != -1: |
| desc = ''.join(desc) |
| |
| try: |
| verifyHTML(desc) |
| except RuntimeError as e: |
| broken.append((lastCaption, lastItem, str(e))) |
| #print('FAIL: %s: %s: %s: %s' % (lastCaption, lastItem, e, desc)) |
| |
| desc = desc.replace('<div class="block">', '') |
| desc = desc.replace('</div>', '') |
| desc = desc.strip() |
| hasDesc = len(desc) > 0 |
| #print(' thing %s: %s' % (lastItem, desc)) |
| |
| desc = None |
| f.close() |
| if len(missing) > 0 or len(broken) > 0: |
| print() |
| print(fullPath) |
| for (caption, item) in missing: |
| print(' missing %s: %s' % (caption, item)) |
| for (caption, item, why) in broken: |
| print(' broken HTML: %s: %s: %s' % (caption, item, why)) |
| return True |
| else: |
| return False |
| |
| def checkSummary(fullPath): |
| printed = False |
| f = open(fullPath, encoding='UTF-8') |
| anyMissing = False |
| sawPackage = False |
| desc = [] |
| lastHREF = None |
| for line in f.readlines(): |
| lineLower = line.strip().lower() |
| if desc is not None: |
| # TODO: also detect missing description in overview-summary |
| if lineLower.startswith('package ') or lineLower.startswith('<h1 title="package" '): |
| sawPackage = True |
| elif sawPackage: |
| if lineLower.startswith('<table ') or lineLower.startswith('<b>see: ') or lineLower.startswith('<p>see:'): |
| desc = ' '.join(desc) |
| desc = reMarkup.sub(' ', desc) |
| desc = desc.strip() |
| if desc == '': |
| if not printed: |
| print() |
| print(fullPath) |
| printed = True |
| print(' no package description (missing package.html in src?)') |
| anyMissing = True |
| desc = None |
| else: |
| desc.append(lineLower) |
| |
| if lineLower in ('<td> </td>', '<td></td>', '<td class="collast"> </td>'): |
| if not printed: |
| print() |
| print(fullPath) |
| printed = True |
| print(' missing description: %s' % unescapeHTML(lastHREF)) |
| anyMissing = True |
| elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1: |
| if not printed: |
| print() |
| print(fullPath) |
| printed = True |
| print(' license-is-javadoc: %s' % unescapeHTML(lastHREF)) |
| anyMissing = True |
| m = reHREF.search(line) |
| if m is not None: |
| lastHREF = m.group(1) |
| if desc is not None and fullPath.find('/overview-summary.html') == -1: |
| raise RuntimeError('BUG: failed to locate description in %s' % fullPath) |
| f.close() |
| return anyMissing |
| |
| def unEscapeURL(s): |
| # Not exhaustive!! |
| s = s.replace('%20', ' ') |
| return s |
| |
| def unescapeHTML(s): |
| s = s.replace('<', '<') |
| s = s.replace('>', '>') |
| s = s.replace('&', '&') |
| return s |
| |
| def checkPackageSummaries(root, level='class'): |
| """ |
| Just checks for blank summary lines in package-summary.html; returns |
| True if there are problems. |
| """ |
| |
| if level != 'class' and level != 'package' and level != 'method' and level != 'none': |
| print('unsupported level: %s, must be "class" or "package" or "method" or "none"' % level) |
| sys.exit(1) |
| |
| #for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root): |
| |
| if False: |
| os.chdir(root) |
| print() |
| print('Run "ant javadocs" > javadocs.log...') |
| if os.system('ant javadocs > javadocs.log 2>&1'): |
| print(' FAILED') |
| sys.exit(1) |
| |
| anyMissing = False |
| if not os.path.isdir(root): |
| checkClassSummaries(root) |
| checkClassDetails(root) |
| sys.exit(0) |
| |
| for dirPath, dirNames, fileNames in os.walk(root): |
| |
| if dirPath.find('/all/') != -1: |
| # These are dups (this is a bit risk, eg, root IS this /all/ directory..) |
| continue |
| |
| if 'package-summary.html' in fileNames: |
| if (level == 'class' or level == 'method') and checkSummary('%s/package-summary.html' % dirPath): |
| anyMissing = True |
| for fileName in fileNames: |
| fullPath = '%s/%s' % (dirPath, fileName) |
| if not fileName.startswith('package-') and fileName.endswith('.html') and os.path.isfile(fullPath): |
| if level == 'method': |
| if checkClassSummaries(fullPath): |
| anyMissing = True |
| # always look for broken html, regardless of level supplied |
| if checkClassDetails(fullPath): |
| anyMissing = True |
| |
| if 'overview-summary.html' in fileNames: |
| if level != 'none' and checkSummary('%s/overview-summary.html' % dirPath): |
| anyMissing = True |
| |
| return anyMissing |
| |
| if __name__ == '__main__': |
| if len(sys.argv) < 2 or len(sys.argv) > 3: |
| print('usage: %s <dir> [none|package|class|method]' % sys.argv[0]) |
| sys.exit(1) |
| if len(sys.argv) == 2: |
| level = 'class' |
| else: |
| level = sys.argv[2] |
| if checkPackageSummaries(sys.argv[1], level): |
| print() |
| print('Missing javadocs were found!') |
| sys.exit(1) |
| sys.exit(0) |