From 71c24608cd9d6796268c6fed52aa3ad4dfd2223d Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 26 Apr 2026 13:36:16 +0100 Subject: [PATCH] Remove all deprecated usages of `codecs.open` and fix many unclosed file warnings --- Makefile | 4 ++ lib/markdown2.py | 11 ++-- perf/gen_perf_cases.py | 31 +++++------ perf/strip_cookbook_data.py | 3 +- test/markdown.py | 94 +++++++++++++++++----------------- test/test_markdown2.py | 25 +++++---- tools/cutarelease.py | 27 +++++----- tools/tables-align-columns.py | 4 +- tools/wiki-tables-to-tables.py | 4 +- 9 files changed, 102 insertions(+), 101 deletions(-) diff --git a/Makefile b/Makefile index d2a9a72f..90a4fa0a 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,10 @@ test: testone: cd test && python test.py -- -knownfailure +.PHONY: testwarn +testwarn: + cd test && python -Wd test.py -- -knownfailure + .PHONY: testredos testredos: python test/test_redos.py diff --git a/lib/markdown2.py b/lib/markdown2.py index ea08820f..d3787250 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -114,7 +114,6 @@ __author__ = "Trent Mick" import argparse -import codecs import logging import re import sys @@ -179,9 +178,8 @@ def markdown_path( footnote_return_symbol: Optional[str] = None, use_file_vars: bool = False ) -> 'UnicodeWithAttrs': - fp = codecs.open(path, 'r', encoding) - text = fp.read() - fp.close() + with open(path, 'r', encoding=encoding) as f: + text = f.read() return Markdown(html4tags=html4tags, tab_width=tab_width, safe_mode=safe_mode, extras=extras, link_patterns=link_patterns, @@ -4766,9 +4764,8 @@ def main(argv=None): if path == '-': text = sys.stdin.read() else: - fp = codecs.open(path, 'r', opts.encoding) - text = fp.read() - fp.close() + with open(path, 'r', encoding=opts.encoding) as f: + text = f.read() if opts.compare: from subprocess import PIPE, Popen print("==== Markdown.pl ====") diff --git a/perf/gen_perf_cases.py b/perf/gen_perf_cases.py index 8a45f3e4..33336c86 100755 --- a/perf/gen_perf_cases.py +++ b/perf/gen_perf_cases.py @@ -7,7 +7,6 @@ from glob import glob import operator import shutil -import codecs TMP = "tmp-" @@ -16,16 +15,16 @@ def gen_aspn_cases(limit=0): base_dir = TMP+'aspn-cases' if exists(base_dir): print("'%s' exists, skipping" % base_dir) - return + return os.makedirs(base_dir) sys.stdout.write("generate %s" % base_dir); sys.stdout.flush() recipes_path = expanduser("~/as/code.as.com/db/aspn/recipes.pprint") - recipe_dicts = eval(open(recipes_path).read()) + with open(recipes_path) as f: + recipe_dicts = eval(f.read()) for i, r in enumerate(recipe_dicts): sys.stdout.write('.'); sys.stdout.flush() - f = codecs.open(join(base_dir, "r%04d.text" % i), "w", "utf-8") - f.write(r["desc"]) - f.close() + with open(join(base_dir, "r%04d.text" % i), "w", encoding="utf-8") as f: + f.write(r["desc"]) for j, c in enumerate(sorted(r["comments"], key=operator.itemgetter("pub_date"))): @@ -36,10 +35,8 @@ def gen_aspn_cases(limit=0): headline += '.' headline = _markdown_from_aspn_html(headline).strip() text = "**" + headline + "** " + text - f = codecs.open(join(base_dir, "r%04dc%02d.text" % (i, j)), - 'w', "utf-8") - f.write(text) - f.close() + with open(join(base_dir, "r%04dc%02d.text" % (i, j)), 'w', encoding="utf-8") as f: + f.write(text) if limit and i >= limit: break @@ -49,7 +46,7 @@ def gen_test_cases(): base_dir = TMP+"test-cases" if exists(base_dir): print("'%s' exists, skipping" % base_dir) - return + return os.makedirs(base_dir) print("generate %s" % base_dir) for test_cases_dir in glob(join("..", "test", "*-cases")): @@ -106,10 +103,10 @@ def _markdown_from_aspn_html(html): if title is None: replacement = '[{}]({})'.format(content, escaped_href) else: - replacement = '[{}]({} "{}")'.format(content, escaped_href, + replacement = '[{}]({} "{}")'.format(content, escaped_href, title.replace('"', "'")) markdown = markdown[:start] + replacement + markdown[end:] - + markdown = markdown.replace(" ", ' ') #
 part 1: Pull out 
-blocks and put in placeholders
@@ -179,18 +176,18 @@ def _markdown_from_aspn_html(html):
 # Recipe: dedent (0.1.2)
 def _dedentlines(lines, tabsize=8, skip_first_line=False):
     """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
-    
+
         "lines" is a list of lines to dedent.
         "tabsize" is the tab width to use for indent width calculations.
         "skip_first_line" is a boolean indicating if the first line should
             be skipped for calculating the indent width and for dedenting.
             This is sometimes useful for docstrings and similar.
-    
+
     Same as dedent() except operates on a sequence of lines. Note: the
     lines list is modified **in-place**.
     """
     DEBUG = False
-    if DEBUG: 
+    if DEBUG:
         print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
               % (tabsize, skip_first_line))
     indents = []
@@ -255,7 +252,7 @@ def _dedent(text, tabsize=8, skip_first_line=False):
         "skip_first_line" is a boolean indicating if the first line should
             be skipped for calculating the indent width and for dedenting.
             This is sometimes useful for docstrings and similar.
-    
+
     textwrap.dedent(s), but don't expand tabs to spaces
     """
     lines = text.splitlines(1)
diff --git a/perf/strip_cookbook_data.py b/perf/strip_cookbook_data.py
index d92597b8..94ba3048 100644
--- a/perf/strip_cookbook_data.py
+++ b/perf/strip_cookbook_data.py
@@ -3,7 +3,8 @@
 
 def doit():
     recipes_path = expanduser("recipes.pprint")
-    recipe_dicts = eval(open(recipes_path).read())
+    with open(recipes_path) as f:
+        recipe_dicts = eval(f.read())
     for r in recipe_dicts:
         for key in r.keys():
             if key not in ('desc', 'comments'):
diff --git a/test/markdown.py b/test/markdown.py
index b2ef85da..0e99f6d7 100644
--- a/test/markdown.py
+++ b/test/markdown.py
@@ -29,7 +29,8 @@
 """
 
 
-import re, sys, codecs
+from pathlib import Path
+import re, sys
 
 # Set debug level: 3 none, 2 critical, 1 informative, 0 all
 (VERBOSE, INFO, CRITICAL, NONE) = list(range(4))
@@ -81,7 +82,7 @@ def removeBOM(text, encoding):
 # and uses the actual name of the executable called.)
 
 EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"
-                    
+
 
 # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
 
@@ -306,7 +307,7 @@ def toxml(self):
             childBuffer += "/>"
 
 
-            
+
         buffer += "<" + self.nodeName
 
         if self.nodeName in ['p', 'li', 'ul', 'ol',
@@ -317,10 +318,10 @@ def toxml(self):
                     bidi = self.bidi
                 else :
                     bidi = self.doc.bidi
-                    
+
                 if bidi=="rtl" :
                     self.setAttribute("dir", "rtl")
-        
+
         for attr in self.attributes :
             value = self.attribute_values[attr]
             value = self.doc.normalizeEntities(value,
@@ -345,7 +346,7 @@ class TextNode :
     attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
 
     def __init__ (self, text) :
-        self.value = text        
+        self.value = text
 
     def attributeCallback(self, match) :
 
@@ -359,7 +360,7 @@ def toxml(self) :
         text = self.value
 
         self.parent.setBidi(getBidiType(text))
-        
+
         if not text.startswith(HTML_PLACEHOLDER_PREFIX):
             if self.parent.nodeName == "p" :
                 text = text.replace("\n", "\n   ")
@@ -479,7 +480,7 @@ def run (self, lines) :
 
 class HtmlBlockPreprocessor (Preprocessor):
     """Removes html blocks from self.lines"""
-    
+
     def _get_left_tag(self, block):
         return block[1:].replace(">", " ", 1).split()[0].lower()
 
@@ -488,7 +489,7 @@ def _get_right_tag(self, left_tag, block):
         return block.rstrip()[-len(left_tag)-2:-1].lower()
 
     def _equal_tags(self, left_tag, right_tag):
-        
+
         if left_tag in ['?', '?php', 'div'] : # handle PHP, etc.
             return True
         if ("/" + left_tag) == right_tag:
@@ -504,18 +505,18 @@ def _equal_tags(self, left_tag, right_tag):
     def _is_oneliner(self, tag):
         return (tag in ['hr', 'hr/'])
 
-    
+
     def run (self, lines) :
 
         new_blocks = []
         text = "\n".join(lines)
         text = text.split("\n\n")
-        
+
         items = []
         left_tag = ''
         right_tag = ''
         in_tag = False # flag
-        
+
         for block in text:
             if block.startswith("\n") :
                 block = block[1:]
@@ -523,7 +524,7 @@ def run (self, lines) :
             if not in_tag:
 
                 if block.startswith("<"):
-                    
+
                     left_tag = self._get_left_tag(block)
                     right_tag = self._get_right_tag(left_tag, block)
 
@@ -535,13 +536,13 @@ def run (self, lines) :
                     if self._is_oneliner(left_tag):
                         new_blocks.append(block.strip())
                         continue
-                        
+
                     if block[1] == "!":
                         # is a comment block
                         left_tag = "--"
                         right_tag = self._get_right_tag(left_tag, block)
                         # keep checking conditions below and maybe just append
-                        
+
                     if block.rstrip().endswith(">") \
                         and self._equal_tags(left_tag, right_tag):
                         new_blocks.append(
@@ -557,9 +558,9 @@ def run (self, lines) :
 
             else:
                 items.append(block.strip())
-                
+
                 right_tag = self._get_right_tag(left_tag, block)
-                
+
                 if self._equal_tags(left_tag, right_tag):
                     # if find closing tag
                     in_tag = False
@@ -570,7 +571,7 @@ def run (self, lines) :
         if items :
             new_blocks.append(self.stash.store('\n\n'.join(items)))
             new_blocks.append('\n')
-            
+
         return "\n\n".join(new_blocks).split("\n")
 
 HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
@@ -1076,10 +1077,10 @@ def __init__(self, source=None,  # deprecated
                                  # inserted later
 
         self.textPostprocessors = [] # a footnote postprocessor will get
-                                     # inserted later                                 
+                                     # inserted later
 
         self.prePatterns = []
-        
+
 
         self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,
                                 BACKTICK_PATTERN,
@@ -1131,7 +1132,7 @@ def registerExtensions(self, extensions, configs) :
                     configs_for_ext = configs[ext]
                 else :
                     configs_for_ext = []
-                extension = module.makeExtension(configs_for_ext)    
+                extension = module.makeExtension(configs_for_ext)
                 extension.extendMarkdown(self, globals())
 
 
@@ -1197,7 +1198,7 @@ def _transform(self):
             else :
                 buffer.append(line)
         self._processSection(self.top_element, buffer)
-        
+
         #self._processSection(self.top_element, self.lines)
 
         # Not sure why I put this in but let's leave it for now.
@@ -1362,7 +1363,7 @@ def _processList(self, parent_elem, lines, inList, listexpr, tag) :
 
                 # Check if the next non-blank line is still a part of the list
                 if ( RE.regExp['ul'].match(next) or
-                     RE.regExp['ol'].match(next) or 
+                     RE.regExp['ol'].match(next) or
                      RE.regExp['tabbed'].match(next) ):
                     # get rid of any white space in the line
                     items[item].append(line.strip())
@@ -1486,7 +1487,7 @@ def _handleInlineWrapper (self, line) :
             i = 0
 
             while i < len(parts) :
-                
+
                 x = parts[i]
 
                 if isinstance(x, str) :
@@ -1506,7 +1507,7 @@ def _handleInlineWrapper (self, line) :
                 parts[i] = self.doc.createTextNode(x)
 
         return parts
-        
+
 
     def _handleInline(self,  line):
         """Transform a Markdown line with inline elements to an XHTML
@@ -1532,7 +1533,7 @@ def _applyPattern(self, line, pattern) :
         """ Given a pattern name, this function checks if the line
         fits the pattern, creates the necessary elements, and returns
         back a list consisting of NanoDom elements and/or strings.
-        
+
         @param line: the text to be processed
         @param pattern: the pattern to be checked
 
@@ -1560,19 +1561,19 @@ def _applyPattern(self, line, pattern) :
             if not node.nodeName in ["code", "pre"] :
                 for child in node.childNodes :
                     if isinstance(child, TextNode):
-                        
+
                         result = self._handleInlineWrapper(child.value)
-                        
+
                         if result:
 
                             if result == [child] :
                                 continue
-                                
+
                             result.reverse()
                             #to make insertion easier
 
                             position = node.childNodes.index(child)
-                            
+
                             node.removeChild(child)
 
                             for item in result:
@@ -1583,7 +1584,7 @@ def _applyPattern(self, line, pattern) :
                                              self.doc.createTextNode(item))
                                 else:
                                     node.insertChild(position, item)
-                
+
 
 
 
@@ -1610,7 +1611,7 @@ def convert (self, source = None):
 
         self.source = removeBOM(self.source, self.encoding)
 
-        
+
         doc = self._transform()
         xml = doc.toxml()
 
@@ -1623,7 +1624,7 @@ def convert (self, source = None):
             html = self.htmlStash.rawHtmlBlocks[i]
             if self.safeMode :
                 html = HTML_REMOVED_TEXT
-                
+
             xml = xml.replace("

%s\n

" % (HTML_PLACEHOLDER % i), html + "\n") xml = xml.replace(HTML_PLACEHOLDER % i, @@ -1642,7 +1643,7 @@ def convert (self, source = None): __str__ = convert # deprecated - will be changed in 1.7 to report # information about the MD instance - + toString = __str__ # toString() method is deprecated @@ -1675,16 +1676,15 @@ def markdownFromFile(input = None, if not encoding : encoding = "utf-8" - input_file = codecs.open(input, mode="r", encoding=encoding) - text = input_file.read() - input_file.close() + assert isinstance(input, (str, Path)), 'input path required' + with open(input, mode="r", encoding=encoding) as f: + text = f.read() new_text = markdown(text, extensions, encoding, safe_mode = safe) if output : - output_file = codecs.open(output, "w", encoding=encoding) - output_file.write(new_text) - output_file.close() + with open(output, "w", encoding=encoding) as f: + f.write(new_text) else : sys.stdout.write(new_text.encode(encoding)) @@ -1693,14 +1693,14 @@ def markdown(text, extensions = [], encoding = None, safe_mode = False) : - + message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text) extension_names = [] extension_configs = {} - + for ext in extensions : - pos = ext.find("(") + pos = ext.find("(") if pos == -1 : extension_names.append(ext) else : @@ -1715,7 +1715,7 @@ def markdown(text, safe_mode = safe_mode) return md.convert(text) - + class Extension : @@ -1740,7 +1740,7 @@ def setConfig(self, key, value) : For lower versions of Python use: %s INPUT_FILE > OUTPUT_FILE - + """ % EXECUTABLE_NAME_FOR_USAGE def parse_options() : @@ -1776,7 +1776,7 @@ def parse_options() : parser.add_option("-s", "--safe", action="store_const", const=True, dest="safe", help="same mode (strip user's HTML tag)") - + parser.add_option("--noisy", action="store_const", const=VERBOSE, dest="verbose", help="print debug messages") @@ -1810,7 +1810,7 @@ def parse_options() : if not options : sys.exit(0) - + markdownFromFile(**options) diff --git a/test/test_markdown2.py b/test/test_markdown2.py index 06313af9..fb433efc 100755 --- a/test/test_markdown2.py +++ b/test/test_markdown2.py @@ -51,16 +51,19 @@ def _assertMarkdownParity(self, text): def _assertMarkdownPath(self, text_path, encoding="utf-8", opts=None, toc_html_path=None, metadata_path=None): - text = codecs.open(text_path, 'r', encoding=encoding).read() + with open(text_path, 'r', encoding=encoding) as f: + text = f.read() html_path = splitext(text_path)[0] + ".html" - html = codecs.open(html_path, 'r', encoding=encoding).read() + with open(html_path, 'r', encoding=encoding) as f: + html = f.read() extra = {} if toc_html_path: - extra["toc_html"] = codecs.open(toc_html_path, 'r', encoding=encoding).read() + with open(toc_html_path, 'r', encoding=encoding) as f: + extra["toc_html"] = f.read() extra["toc_html_path"] = toc_html_path if metadata_path: - extra["metadata"] = json_loads( - codecs.open(metadata_path, 'r', encoding=encoding).read()) + with open(metadata_path, 'r', encoding=encoding) as f: + extra["metadata"] = json_loads(f.read()) extra["metadata_path"] = metadata_path self._assertMarkdown(text, html, text_path, html_path, opts=opts, **extra) @@ -152,7 +155,8 @@ def generate_tests(cls): if exists(opts_path): try: with warnings.catch_warnings(record=True) as caught_warnings: - opts = eval(open(opts_path).read()) + with open(opts_path) as f: + opts = eval(f.read()) for warning in caught_warnings: print("WARNING: loading %s generated warning: %s - lineno %d" % (opts_path, warning.message, warning.lineno), file=sys.stderr) except Exception: @@ -175,10 +179,11 @@ def generate_tests(cls): tags_path = splitext(text_path)[0] + ".tags" if exists(tags_path): tags = [] - for line in open(tags_path): - if '#' in line: # allow comments in .tags files - line = line[:line.index('#')] - tags += line.split() + with open(tags_path) as f: + for line in f: + if '#' in line: # allow comments in .tags files + line = line[:line.index('#')] + tags += line.split() test_func.tags = tags name = splitext(basename(text_path))[0] diff --git a/tools/cutarelease.py b/tools/cutarelease.py index 54201e22..2c4e0f5a 100755 --- a/tools/cutarelease.py +++ b/tools/cutarelease.py @@ -19,7 +19,6 @@ import os from os.path import exists, basename, splitext import re -import codecs import logging import optparse import json @@ -146,9 +145,8 @@ def cutarelease(project_name, version_files, dry_run=False): changes_txt = changes_txt.replace(" (not yet released)", "", 1) if not dry_run and changes_txt != changes_txt_before: log.info("prepare `%s' for release", changes_path) - f = codecs.open(changes_path, 'w', 'utf-8') - f.write(changes_txt) - f.close() + with open(changes_path, 'w', encoding='utf-8') as f: + f.write(changes_txt) run('git commit %s -m "prepare for %s release"' % (changes_path, version)) @@ -196,14 +194,14 @@ def cutarelease(project_name, version_files, dry_run=False): changes_txt = changes_txt.replace(marker + '\n', "{}\n\n(nothing yet)\n\n\n{}\n".format(next_verline, marker)) if not dry_run: - f = codecs.open(changes_path, 'w', 'utf-8') - f.write(changes_txt) - f.close() + with open(changes_path, 'w', encoding='utf-8') as f: + f.write(changes_txt) # - update version file next_version_tuple = _tuple_from_version(next_version) for i, ver_file in enumerate(version_files): - ver_content = codecs.open(ver_file, 'r', 'utf-8').read() + with open(ver_file, 'r', encoding='utf-8') as f: + ver_content = f.read() ver_file_type, ver_info = parsed_version_files[i] if ver_file_type == "json": marker = '"version": "%s"' % version @@ -232,9 +230,8 @@ def cutarelease(project_name, version_files, dry_run=False): raise Error("unknown ver_file_type: %r" % ver_file_type) if not dry_run: log.info("update version to '%s' in '%s'", next_version, ver_file) - f = codecs.open(ver_file, 'w', 'utf-8') - f.write(ver_content) - f.close() + with open(ver_file, 'w', encoding='utf-8') as f: + f.write(ver_content) if not dry_run: run('git commit {} {} -m "prep for future dev"'.format( @@ -319,9 +316,8 @@ def _parse_version_file(version_file): if version_file_type in aliases: version_file_type = aliases[version_file_type] - f = codecs.open(version_file, 'r', 'utf-8') - content = f.read() - f.close() + with open(version_file, 'r', encoding='utf-8') as f: + content = f.read() if not version_file_type: # Guess the type. @@ -410,7 +406,8 @@ def parse_changelog(changes_path): """ if not exists(changes_path): raise Error("changelog file '%s' not found" % changes_path) - content = codecs.open(changes_path, 'r', 'utf-8').read() + with open(changes_path, 'r', encoding='utf-8') as f: + content = f.read() parser = re.compile( r'^##\s*(?P[^\n]*?)\s*$(?P.*?)(?=^##|\Z)', diff --git a/tools/tables-align-columns.py b/tools/tables-align-columns.py index f3535928..89b51cd0 100755 --- a/tools/tables-align-columns.py +++ b/tools/tables-align-columns.py @@ -12,7 +12,6 @@ __version__ = "1.0.0" -import codecs import re import sys from collections import defaultdict @@ -100,7 +99,8 @@ def _table_sub(match): table_str = '\n'.join(('| ' + ' | '.join(r) + ' |') for r in table) return table_str + '\n' - text = codecs.open(path, 'rb', 'utf8').read() + with open(path, 'rb', encoding='utf8') as f: + text = f.read() less_than_tab = 3 table_re = re.compile(r''' diff --git a/tools/wiki-tables-to-tables.py b/tools/wiki-tables-to-tables.py index 8c3b3fb0..6032aee1 100755 --- a/tools/wiki-tables-to-tables.py +++ b/tools/wiki-tables-to-tables.py @@ -33,7 +33,6 @@ __version__ = "1.0.0" -import codecs import re import sys @@ -77,7 +76,8 @@ def _wiki_table_sub(match): table_str = '\n'.join(('| ' + ' | '.join(r) + ' |') for r in table) return table_str + '\n' - text = codecs.open(path, 'rb', 'utf8').read() + with open(path, 'rb', encoding='utf8') as f: + text = f.read() # If there is a leading markdown2 metadata block with; # markdown2extras: ..., wiki-tables, ...