D7net
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
opt
/
cloudlinux
/
venv
/
lib64
/
python3.11
/
site-packages
/
guppy
/
gsl
/
Filename :
XHTML.py
back
Copy
class Node2XHTML: def __init__(self, mod, node=None, error_report=None, encode_name=None ): self.mod = mod self.valid_html40 = False self.encode = self.mod.encode if encode_name is None: encode_name = self.mod.encode_name self.encode_name = encode_name if error_report is not None: self.error_report = error_report self.document_lang = None self.header_nodes = [] self.indent = 0 self.indentstep = 1 self.set_out([]) # xxx where do this? charset = 'utf-8' self.header_nodes.append(self.mod.node_of_taci( 'meta', '', ( self.mod.node_of_taci('http-equiv=', 'Content-Type'), self.mod.node_of_taci('content=', 'text/html; charset=%s' % charset)))) if node is not None: node.accept(self) def _visit_children(self, node): node, attrs = node.split_attrs() # xxx handle attrs? E = self.mod.ReportedError for ch in node.children: try: ch.accept(self) except E: pass def begin(self, tag, arg=''): t = '<'+tag if arg: t = t + ' ' + arg if tag in self.mod._no_end_tag_elements: # Quote from: http://gutenberg.hwg.org/markupXHTML.html # N.B. The penultimate closing slash on empty elements such as the <img/> # element can cause a problem in older browsers. For this reason it is # recommended that you leave a space before the slash, namely <img /> t += ' /' t += '>' if tag in self.mod.line_break_allowed: t = '\n'+self.indent * ' ' + t self.append(t) self.indent += self.indentstep def chg_out(self, out): oo = self.out self.set_out(out) return oo def encode_link_name(self, name): # 1. Make the name better looking for a html user's perspective # 2. Encode it by HTML rules if name.startswith(self.mod.tgt_prefix): name = name[len(self.mod.tgt_prefix):] else: # Should not happen often or at all assert 0 name = self.encode_name(name) return name def end(self, tag): self.indent -= self.indentstep self.append('</%s>' % tag) def error(self, msg, *args, **kwds): msg = 'Doc2XHTML: ' + msg self.error_report(msg, *args, **kwds) def error_report(self, msg, *args, **kwds): print('HTML ENCODING ERROR: ', msg, 'args=', args, 'kwds=', kwds) raise ValueError def gen_document_header(self, lang, header_nodes): # lang & title are nodes with text or char directives, to be encoded. # metas is a list of nodes, with data to be encoded strict = 1 # we have alternatives, I just havent yet decided how or if to let the user choose if strict: self.append("""\ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> """) else: self.append("""\ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> """) self.begin('html', 'lang=%r xmlns="http://www.w3.org/1999/xhtml"' % self.get_encoded_text( lang), ) self.begin('head') for node in header_nodes: self.gen_stdhtml(node) self.end('head') self.begin('body') # Get around w3c restriction that character data are not allowed # directly in body, makes it easier to write compliant code # Arguably the restriction is there for a reason, but I dont know... self.begin('div') def gen_document_trailer(self): self.end('div') self.end('body') self.end('html') def gen_empty_elmt(self, tag, arg=''): self.begin(tag, arg) self.indent -= self.indentstep def gen_generated_from_gsl(self): self.gen_empty_elmt('hr') self.append('Generated by ') self.begin('a', 'href="https://zhuyifei1999.github.io/guppy3/gsl.html"') #self.begin('a', 'href="gsl.html"') self.append('GSL-XHTML 0.1.7') self.end('a') self.append(' on '+self.mod.time.asctime(self.mod.time.localtime())) def gen_meta(self, node, tag=None): mknode = self.mod.node_of_taci if tag is None: tag = node.tag self.header_nodes.append( mknode('meta', '', [mknode('name=', tag), mknode('content=', node.arg, node.children)])) def gen_stdhtml(self, node, tag=None, **options): if tag is None: tag = node.tag node, attrs = node.split_attrs(tag) self.begin(tag, ' '.join(['%s=%r' % (key, val) for (key, val) in attrs])) if tag in self.mod._no_end_tag_elements: if node.arg: self.error( 'No enclosed text allowed for Html tag: %r.' % node.tag) self.no_children(node) self.indent -= self.indentstep else: node.arg_accept(self) self.end(tag) def get_encoded_text(self, node): # From a node's arg and children that are text or characters old_out = self.chg_out([]) self.append(self.encode(node.arg)) for ch in node.children: if ch.tag in ('text', 'char'): ch.accept(self) else: self.error('Only text and char allowed here, not %r.' % ch.tag, ch) return ''.join(self.chg_out(old_out)) def get_html(self): return ''.join(self.out) def no_children(self, node): if node.children: self.error('No children allowed for %r. Got children nodes = %r.' % ( node.tag, node.children)) def set_out(self, out): self.out = out self.extend = out.extend self.append = out.append def visit_author(self, node): self.gen_meta(node) def visit_block(self, node): self._visit_children(node) def visit_char(self, node): name = node.get_namearg() if name in self.mod.name2codepoint: name = '&%s;' % name else: if name[:2] == "0x": char = int(name[2:], 16) elif name.isdigit(): char = int(name) else: self.error('No such character: %r.' % name, node) name = self.mod.codepoint2name.get(char) if name is None: name = '&#%d;' % char else: name = '&%s;' % name self.append(name) self._visit_children(node) def visit_col_width(self, node): self.append('<col width="%s" />' % node.arg) def visit_comment(self, node): return # self.append('<!-- %s -->'%node.arg) def visit_default(self, node): if node.tag in self.mod.stdhtml: if node.tag in self.mod._head_elements: self.head_nodes.append(node) else: self.gen_stdhtml(node) else: self.error('I don\'t know what to generate for the tag %r.' % node.tag, node) def visit_define(self, node): name = self.encode_link_name(node.arg) self.begin('a', 'name=%r' % name) self._visit_children(node) self.end('a') def visit_document(self, node): self.indent = 2 # Known indentation of header to be generated later oldout = self.chg_out([]) self._visit_children(node) # self.gen_generated_from_gsl() newout = self.chg_out(oldout) mknode = self.mod.node_of_taci lang = self.document_lang if not lang: lang = mknode('document_lang', 'en') self.indent = 0 self.gen_document_header(lang, self.header_nodes) self.out.extend(newout) self.gen_document_trailer() def visit_document_lang(self, node): if self.document_lang is not None: self.error('Duplicate document lang directive.', node) self.document_lang = node def visit_document_title(self, node): self.header_nodes.append(self.mod.node_of_taci('title', node.arg)) def visit_enumerate(self, node): self.begin('ol') for c in node.children: self.begin('li') c.accept(self) self.end('li') self.end('ol') def visit_exdefs(self, node): self.symplace = {} for ch in node.children: syms = [x.strip() for x in ch.arg.split(',')] for sym in syms: self.symplace[sym] = ch.tag def visit_generated_from_gsl(self, node): self.gen_generated_from_gsl() def visit_header(self, node): self.header_nodes.extend(node.children) def visit_itemize(self, node): self.begin('ul') for c in node.children: self.begin('li') c.accept(self) self.end('li') self.end('ul') def visit_link_to_extern(self, node): name = node.arg docname = node.children[0].arg children = node.children[1:] uri = '%s.html#%s' % (docname, self.encode_link_name(name)) self.begin('a', 'href=%r' % uri) if not children: self.append(self.encode(name)) else: for ch in children: ch.accept(self) self.end('a') def visit_link_to_local(self, node): name = node.arg uri = '#%s' % self.encode_link_name(name) self.begin('a', 'href=%r' % uri) if not node.children: self.append(self.encode(name)) else: self._visit_children(node) self.end('a') def visit_link_to_unresolved(self, node): name = node.arg self.begin('em') if not node.children: self.append(self.encode(name)) else: self._visit_children(node) self.end('em') def visit_literal_block(self, node): self.gen_stdhtml(node, 'pre') def visit_man_page_mode(self, node): self._visit_children(node) def visit_meta(self, node): self.document_metas.append(node) def visit_spc_colonkind(self, node): # self.append(' <strong>:</strong> ') # self.append(' <code>:</code> ') self.append('<code>:</code> ') def visit_spc_mapsto(self, node): self.append(' <strong>-></strong> ') def visit_string(self, node): self._visit_children(node) def visit_symbol(self, node): self.visit_text(node) def visit_text(self, node): text = self.encode(node.arg) if len(text) > 80 or '\n' in text: self.append('\n') self.append(text) self._visit_children(node) def visit_to_document_only(self, node): self._visit_children(node) def visit_to_html_only(self, node): self._visit_children(node) def visit_to_tester_only(self, node): pass def visit_valid_html40(self, node): self.valid_html40 = node node, attrs = self.valid_html40.split_attrs(attrdict=True) # XXX check allowed attrs but in a GENERAL way # Code taken from validator.w3.org self.append("""\ <a href="http://validator.w3.org/check?uri=referer"><img src="%s" alt="Valid HTML 4.0 Strict" height="31" width="88" /></a> """ % attrs.get('src', 'http://www.w3.org/Icons/valid-html40')) def visit_with(self, node): pass def visit_word(self, node): self._visit_children(node) class _GLUECLAMP_: _imports_ = ( '_parent:SpecNodes', '_parent.SpecNodes:node_of_taci', '_parent.Gsml:is_not_ascii', '_parent.Main:tgt_prefix', '_parent.Main:ReportedError', '_root.htmlentitydefs:name2codepoint', '_root.htmlentitydefs:codepoint2name', '_root:re', '_root:time', ) _chgable_ = ('tag_uppercase_name_chars',) # Set to make upper-case name characters tagged to make sure # no names in a file differ only in case as stated in HTML spec. # I believe this doesn't matter in practice in contemporary browsers, # since references are also said to be case sensitive! # -- I can't be bothered to solve this better now. See also Notes Aug 12 2005. tag_uppercase_name_chars = 0 _html3_2 = ( 'a', 'address', 'area', 'b', 'base', 'big', 'blockquote', 'body', 'br', 'caption', 'center', 'cite', 'code', 'dfn', 'dt', 'dl', 'dd', 'div', 'em', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'html', 'i', 'img', 'input', 'kbd', 'li', 'ol', 'option', 'p', 'param', 'pre', 'samp', 'select', 'small', 'strong', 'style', 'sub', 'sup', 'table', 'td', 'textarea', 'th', 'thead', 'title', 'tr', 'tt', 'ul', 'var') # Included in Html 3.2 but 'deprecated' in Html 4.0 _html4_0_deprecated = ( 'applet', 'basefont', 'dir', 'font', 'isindex', 'strike', 'u', ) # Included in 3.2, not depreciated in 4.0 but one may want to avoid them _html_avoid = ( 'script', ) _html4_0 = ( 'abbr', 'acronym', 'bdo', 'button', 'col', 'colgroup', 'del', 'fieldset', 'frame', 'frameset', 'iframe', 'ins', 'label', 'legend', 'noframes', 'noscript', 'object', 'optgroup', 'q', 's', 'span', 'tbody', 'tfoot', 'thead') _head_elements = ( 'base', 'isindex', 'link', 'meta', 'script', 'style', 'title' ) # The ones that can have no end tag # xxx are there more -style etc- look it up! _no_end_tag_elements = ( # Header elmts 'meta', 'link', # Other 'img', # CAN have end tag? but never has. This will self-close to generate valid XHTML. 'hr', ) # The ones that we may generate line-break before # and hope it will not affect the insertion of spaces in rendering. _line_break_allowed = ( 'html', 'head', 'body', 'frameset', # Head Elements ) + _head_elements + ( # Generic Block-level Elements 'address', 'blockquote', 'center', 'del', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ins', 'isindex', 'noscript', 'p', 'pre', # Lists 'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul', # Tables 'table', 'caption', 'colgroup', 'col', 'thead', 'tfoot', 'tbody', 'tr', 'td', 'th', # Forms 'form', 'button', 'fieldset', 'legend', 'input', 'label', 'select', 'optgroup', 'option', 'textarea' ) # The attributes allowed in META elements meta_attributes = ('name', 'http-equiv', 'content', 'scheme', 'lang', 'dir') # This returns a function checking if a character is allowed to be used # as the first character in a NAME or ID attribute. # (I don't think this is the same as .isalpha() with unicode.) def _get_is_name_starter_char(self): return self.re.compile(r"[A-Za-z]").match # This returns a function checking if a character is allowed to be used # after the first character in a NAME or ID attribute. def _get_is_name_follower_char(self): return self.re.compile(r"[A-Za-z0-9\-_:\.]").match # A set of the ones we generate directly. # This includes the ones from html 3.2 and # I have also included the deprecated and the 4.0 only def _get_stdhtml(self): sh = {} for x in self._html3_2 + self._html4_0_deprecated + self._html4_0: sh[x] = 1 return sh def _get_line_break_allowed(self): sh = {} for x in self._line_break_allowed: sh[x] = 1 return sh def doc2filer(self, doc, node, name, dir, opts, IO): text = self.doc2text(doc, node) path = IO.path.join(dir, '%s.html' % name) node = self.node_of_taci('write_file', path, [ self.node_of_taci('text', text)]) return node def doc2text(self, doc, node): d2h = Node2XHTML(self, node, doc.env.error) return d2h.get_html() def node2file(self, node, file): text = self.node2text(node) with open(file, 'w') as f: f.write(text) def node2text(self, node): text = Node2XHTML(self, node).get_html() return text # Adapted from html4css1.py in docutils def encode(self, text): """Encode special characters in `text` & return.""" # @@@ A codec to do these and all other HTML entities would be nice. text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace('"', """) text = text.replace(">", ">") text = text.replace("@", "@") # may thwart some address harvesters return text # Encode a name according to HTML spec. See also Notes Aug 12 2005. # From wdghtml40/values.html#cdata : # Attribute values of type ID and NAME must begin with a letter in the # range A-Z or a-z and may be followed by letters (A-Za-z), digits # (0-9), hyphens ("-"), underscores ("_"), colons (":"), and periods # ("."). These values are case-sensitive. def encode_name(self, name): is_name_follower_char = self.is_name_follower_char ns = [] append = ns.append upperstate = 0 ch = name[:1] if ch == 'z' or not self.is_name_starter_char(ch): append('z') if ch == 'z': append('z') for ch in name: if ch == '-' or not is_name_follower_char(ch): if upperstate: append('-') upperstate = 0 append('-') if ch != '-': append('%d' % ord(ch)) append('-') elif ch.isupper() and self.tag_uppercase_name_chars: if not upperstate: append('-') upperstate = 1 append(ch) else: if upperstate: append('-') upperstate = 0 append(ch) if upperstate: append('-') return ''.join(ns)