#!/usr/bin/env python # # The whitespace module. # Copyright (C) 1999-2000 Rajesh Vaidheeswarran # All rights reserved. # # Redistribution and use are permitted provided that distributions # retain this entire copyright notice and comment. The name of the # author may not be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # Available at # """ The whitespace module will scan a given test file and indicate if the following types of undesirable whitespaces are present. 1. Leading space (empty lines at the top of a file). 2. Trailing space (empty lines at the end of a file). 3. Indentation space (8 or more spaces at the beginning of a line, that should be replaced with TABS). (since this is the most controversial one, here is the rationale: Most terminal drivers and printer drivers have TAB configured or even hardcoded to be 8 spaces. (Some of them allow configuration, but almost always they default to 8.) Changing tab-width to other than 8 and editing will cause your code to look different from within emacs, and say, if you cat it or more it, or even print it. Almost all the popular programming modes let you define an offset (like c-basic-offset or perl-indent-level) to configure the offset, so you should never have to set your tab-width to be other than 8 in all these modes. In fact, with an indent level of say, 4, 2 TABS will cause emacs to replace your 8 spaces with one \t (try it). If vi users in your office complain, tell them to use vim, which distinguishes between tabstop and shiftwidth (vi equivalent of our offsets), and also ask them to set smarttab) That said, if you still don't want to test for it, then use the noindent flag. 4. Spaces followed by a TAB. (Almost always, we never want that). 5. Spaces or TABS at the end of a line. 6. Tabs in string literals. Though not necessarily bogus, most occurances of this are in format or other printable strings where they can be replaced by a `\\t'. """ import re import fileinput __version__ = 2.0 __ident__ = '$Id:$' # # WhitespaceException # class WhitespaceException(Exception): """Base class for all exceptions raised by this module.""" class WhitespaceNonTextFileError(WhitespaceException): """The given file is not a text file (per UNIX which states that all lines are \\\n terminated).""" def __init__(self): self.args = (100, "File not a text file.") # # The `whitespaceerrors' class # class whitespaceerrors: """The whitespace errors class. This class defines the types of regular expressions used to match the different types of whitespace errors. One instance of this is used by the whitespace class to keep track of the types and number of errors that have been encountered. This is an internal class. Users of the whitespace module should only instantiate the `whitespace' class. """ leadtrailre = re.compile("^[ \t]*$") indentspacere = re.compile("^(\t*) {8}") spacetabre = re.compile(" \t") #strtab = re.compile("^[^\"]*\"[^\"]*\t") strtabre = re.compile("^[^\"]*\"[^\"]*(\"[^\"]\"[^\"]*)*\t") ateolre = re.compile("[ \t]$") def __init__(self, noindent): self.noindent = noindent self.leadingerr = 0 self.trailingerr = 0 self.indenterr = 0 self.spacetaberr = 0 self.strtaberr = 0 self.ateolerr = 0 self.leadarray = [] self.indentarray = [] self.spacetabarray = [] self.strtabarray = [] self.ateolarray = [] self.trailarray = [] # # Methods to check for whitespace errors. # def hasleadtrail(self, line): "Does the given line have a leading/trailing whitespace match?" return self.leadtrailre.match(line) def hasindent(self, line): "Does the given line have an indentation whitespace match?" if self.noindent == 0: return self.indentspacere.match(line) else: return 0 def hasspacetab(self, line): "Does the given line have space followed by a tab?" return self.spacetabre.search(line) def hasstrtab(self, line): "Does the given line have tab inside a literal string?" return self.strtabre.match(line) def hasateol(self, line): "Does the given line have a space or a tab at the end-of-line?" return self.ateolre.search(line) # # Methods to return an array of lines with whitespace errors. # def leading(self): "List of all lines with leading whitespace" return self.leadarray def indent(self): "List of all lines with indentation whitespace" return self.indentarray def spacetab(self): "List of all lines containing spaces followed by tabs" return self.spacetabarray def strtab(self): "List of all lines containing tabs in literal strings" return self.strtabarray def ateol(self): "List of all lines with spaces/tabs at the end-of-line" return self.ateolarray def trailing(self): "List of all lines with trailing whitespace" return self.trailarray # # Methods to append an offending line to an array. # def append(self, lt, lineno): "Add the given line number to the given list" if lineno != None: lt.append(lineno) def setleading(self, val=1, line=None): """Turn on(/off) leading space flag, and note down the given line number, if any.""" self.leadingerr = val self.append(self.leadarray, line) def setindent(self, val=1, line=None): """Turn on(/off) indentation space flag, and note down the given line number, if any.""" self.indenterr = val self.append(self.indentarray, line) def setspacetab(self, val=1, line=None): """Turn on(/off) space followed by tab flag, and note down the given line number, if any.""" self.spacetaberr = val self.append(self.spacetabarray, line) def setstrtab(self, val=1, line=None): """Turn on(/off) tab in literal string flag, and note down the given line number, if any.""" self.strtaberr = val self.append(self.strtabarray, line) def setateol(self, val=1, line=None): """Turn on(/off) whitespace at end-of-line flag, and note down the given line number, if any.""" self.ateolerr = val self.append(self.ateolarray, line) def settrailing(self, val=1, line=None): """Turn on(/off) trailing space flag, and note down the given line number, if any.""" self.trailingerr = val self.append(self.trailarray, line) # # Methods to know if a type of error has been recorded. # def getleading(self): "Return 1 if leading space error encountered" return self.leadingerr def getindent(self): "Return 1 if indentation space error encountered" return self.indenterr def getspacetab(self): "Return 1 if space followed by tab error encountered" return self.spacetaberr def getstrtab(self): "Return 1 if tabs in literal strings encountered" return self.strtaberr def getateol(self): "Return 1 if space at end-of-line encountered" return self.ateolerr def gettrailing(self): "Return 1 if trailing space error encountered" return self.trailingerr # # The 'whitespace' class # class whitespace: """The `whitespace' class This class checks the given file for the whitespace errors as defined by the whitespaceerrors class. Construction can be done be passing in a filename (mandatory) to check for whitespace errors. Exported API: error() - returns one of: < 0: A failure. Exception can be gathered from exc(). = 0: No whitespace errors. > 0: the number of distinct whitespace errors types detected in the given file. errorlist() - Returns a dictionary of whitespaces error types, and the lines they were detected in. exc() - Returns an exception tuple if error() return a negative integer. """ dirty = 0 errors = '' excep = '' fatal = 0 filename = '' noindent = 0 def __init__(self, filename, noindent): "Construct a whitespace object for a given file" self.noindent = noindent self.filename = filename self.errors = whitespaceerrors(noindent) self.dirty = 0 self.fatal = 0 self.check() def error(self): """Whitespace errors that have been detected in the file < 0: A failure. Exception can be gathered from exc() = 0: No whitespace errors > 0: the number of distinct whitespace errors types detected in the given file""" if self.fatal < 0: return self.fatal self.dirty = 0 if self.errors.getleading(): self.dirty = self.dirty + 1 if self.errors.getindent(): self.dirty = self.dirty + 1 if self.errors.getspacetab(): self.dirty = self.dirty + 1 if self.errors.getstrtab(): self.dirty = self.dirty + 1 if self.errors.getateol(): self.dirty = self.dirty + 1 if self.errors.gettrailing(): self.dirty = self.dirty + 1 return self.dirty def errorlist(self): """A dictionary of the types of whitespace errors and the list of lines they have occurred in.""" retv = {} if self.error() <= 0: return retv if self.errors.getleading(): retv["Leading whitespace"] = self.errors.leading() if self.errors.getindent(): retv["Indentation whitespace"] = self.errors.indent() if self.errors.getspacetab(): retv["Space followed by Tab"] = self.errors.spacetab() if self.errors.getstrtab(): retv["Tab in string literal"] = self.errors.strtab() if self.errors.getateol(): retv["End-of-line whitespace"] = self.errors.ateol() if self.errors.gettrailing(): retv["Trailing whitespace"] = self.errors.trailing() return retv def check(self): """Check the given file for the different types of whitespace errors and returns one of: < 0: A failure. Exception can be gathered from exc() = 0: No whitespace errors > 0: the number of distinct whitespace errors types detected in the given file (also obtainable from error())""" err = 0 textfilere = re.compile("^.*\n$") try: lineno = 0 for line in fileinput.input(self.filename): lineno = fileinput.filelineno() if not textfilere.match(line): fileinput.close() raise WhitespaceNonTextFileError() if self.errors.hasleadtrail(line): if fileinput.isfirstline(): self.errors.setleading(line=lineno) else: self.errors.settrailing() pass pass else: self.errors.settrailing(0) pass if self.errors.hasindent(line): self.errors.setindent(line=lineno) pass if self.errors.hasspacetab(line): self.errors.setspacetab(line=lineno) pass if self.errors.hasstrtab(line): self.errors.setstrtab(line=lineno) pass if self.errors.hasateol(line): self.errors.setateol(line=lineno) pass pass fileinput.close() err = self.error() if self.errors.gettrailing(): self.errors.settrailing(line=lineno) pass return err except IOError, e: self.excep = e self.fatal = -e[0] return self.fatal except WhitespaceNonTextFileError, e: self.excep = e self.fatal = -e[0] return self.fatal def exc(self): "The exception that last occurred" return self.excep def leadtrailclean(self, lt, which): l = lt if which == "trail": l.reverse() first = 1 ret = [] for line in l: if self.errors.hasleadtrail(line): if not first: ret.append(line) else: pass pass else: ret.append(line) first = 0 pass pass if which == "trail": ret.reverse() else: pass return ret def regclean(self, rematch, reg, nreg, line): while rematch(line): line = reg.sub(nreg, line) pass return line def cleanup(self, file=None): """Cleanup the current file, if whitespace unclean. Given a filename, write to the file. The same name as the original file will cause an inplace replacement. If no file name is given, then a .clean file is written We don't attempt to fix the tab-in-literal-string whitespace alone, since it is language dependent, and assuming a `\\t' would also assume how tab is represented in various languages""" if self.fatal: return self.fatal if self.error() == 0: return 0 try: import os, shutil pid = os.getpid() if file == None: file = self.filename + ".clean" pass tmpfile = self.filename + "." + str(pid) inf = open(self.filename, "r") inputlines = inf.readlines() inf.close() outf = open(tmpfile, "w") olist = [] for line in inputlines: # Fix indentation whitespace line = self.regclean(self.errors.hasindent, self.errors.indentspacere, r"\1\t", line) # Fix EOL whitespace line = self.regclean(self.errors.hasateol, self.errors.ateolre, "", line) # Fix Space-followed-by-Tab whitespace line = self.regclean(self.errors.hasspacetab, self.errors.spacetabre, "\t", line) olist.append(line) pass # Fix Leading space olist = self.leadtrailclean(olist, "lead") # Fix Trailing space olist = self.leadtrailclean(olist, "trail") outf.writelines(olist) outf.close() shutil.copymode(self.filename, tmpfile) shutil.copy(tmpfile, file) os.unlink(tmpfile) return 0 except IOError, e: self.excep = e self.fatal = -e[0] return self.fatal pass pass import sys import string from getopt import getopt class whitespaceprogram: """The program itself""" name = string.split(sys.argv[0], '/')[-1] ret = 0 flags = '?hcvVI' verbose = 0 cleanup = 0 noindent = 0 def __init__(self): "Construct a whitespace object for a given file" self.run() def errlist(self, file, dict): for err, lines in dict.items(): if self.verbose: print "%s: %s %s" % (file, err, lines) else: print "%s: %s" % (file, err) pass pass pass def usage(self): print "%s: No input files" % self.name print "Usage: %s [-V] [ -c | -v | -h | -I | -? ] file ..." % self.name print "-V\t version information" print "-c\t cleanup" print "-v\t verbose" print "-I\t No indentation whitespace check" print "-h/-?\t this message" sys.exit(-1) pass def run(self): try: optlist, args = getopt(sys.argv[1:], self.flags) except: self.usage() pass for (flag, arg) in optlist: if flag == '-V': print "%s v%2.1f" % (self.name, __version__) print "Copyright (C) 1999-2000 Rajesh Vaidheeswarran" print "All rights reserved." print elif flag == '-?' or flag == '-h': self.usage() elif flag == '-v': self.verbose = 1 elif flag == '-c': self.cleanup = 1 elif flag == '-I': self.noindent = 1 pass pass if len(args) == 0: self.usage() pass for file in args: thisfile = whitespace(file, self.noindent) cv = thisfile.error() if cv < 0: print "%s: %s" % (file, thisfile.exc()[1]) else: self.ret = self.ret + cv pass if cv: errs = thisfile.errorlist() if self.cleanup: if self.verbose: self.errlist(file, errs) pass retv = thisfile.cleanup(file) if retv != 0: print "%s returned %d" % (file, retv) pass pass else: self.errlist(file, errs) pass pass pass sys.exit(self.ret) pass pass if __name__ == '__main__': whitespaceprogram() pass