#!/usr/bin/env python3 # -*- coding: latin-1 -*- """ entities.py Transform the file 'http://www.w3.org/TR/html401/sgml/entities.html' read from standard input to an HTML table on standard output. GPLv3 --- Copyright (C) 2018 Olivier Pirson http://www.opimedia.be/ Started the October 6, 2007 v.01.00 --- October 6, 2007 --- February 20, 2008 --- May 19, 2008 v.01.01 --- September 28, 2009 : new website --- March 15, 2010 : new website --- January 2, 2012 : new website v.01.02 --- June 20, 2012 : Python 3 v.02.00 --- November 23, 2018: cleaned Python style """ import re import sys VERSION = 'v.02.00 --- November 23, 2018' # # Main ###### def main(): """ Main """ entities = {} # Read list of entities complete = '' # complete line for line in sys.stdin: complete += line[:-1] if re.search(r'-->\s*$', line) is None: # line is not complete continue match = re.search(r'!ENTITY\s+(\w+)\s.+\#(\d+);.+?-- (.+)\s*,s*(.+)\s*-->\s*$', complete) if match is not None: # entity find character = int(match.group(2)) # entity HTML code assert character not in entities # same code find twice name = match.group(1).strip() description = re.sub(r'\s+', ' ', match.group(3).strip()) hexa = match.group(4).strip() if name == 'lang': description += ' (⟨ U+027E8 for HTML5)' if name == 'rang': description += ' (⟩ U+027E9 for HTML5)' entities[character] = (name, description, hexa) complete = '' print(len(entities), 'entities founded', file=sys.stderr) # Print simple entities list seq = [] for character in sorted(entities): seq.append('&{0};' .format(entities[character][0], character, entities[character][1])) print('
Caractère | Nom / Code HTML | Description | |||
---|---|---|---|---|---|
&{0}; | &{0}; | &{0}; | &#{1}; | {2} | {3} |