#!/usr/bin/env python
#
# The whitespace module.
# Copyright (C) 1999-2000 Rajesh Vaidheeswarran
# All rights reserved.
#
# Redistribution and use are permitted provided that distributions
# retain this entire copyright notice and comment. The name of the
# author may not be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# Available at
#
"""
The whitespace module will scan a given test file and indicate if the
following types of undesirable whitespaces are present.
1. Leading space (empty lines at the top of a file).
2. Trailing space (empty lines at the end of a file).
3. Indentation space (8 or more spaces at the beginning of a line, that
should be replaced with TABS).
(since this is the most controversial one, here is the rationale: Most
terminal drivers and printer drivers have TAB configured or even
hardcoded to be 8 spaces. (Some of them allow configuration, but almost
always they default to 8.)
Changing tab-width to other than 8 and editing will cause your code to
look different from within emacs, and say, if you cat it or more it, or
even print it.
Almost all the popular programming modes let you define an offset (like
c-basic-offset or perl-indent-level) to configure the offset, so you
should never have to set your tab-width to be other than 8 in all these
modes. In fact, with an indent level of say, 4, 2 TABS will cause emacs
to replace your 8 spaces with one \t (try it). If vi users in your
office complain, tell them to use vim, which distinguishes between
tabstop and shiftwidth (vi equivalent of our offsets), and also ask them
to set smarttab)
That said, if you still don't want to test for it, then use the noindent
flag.
4. Spaces followed by a TAB. (Almost always, we never want that).
5. Spaces or TABS at the end of a line.
6. Tabs in string literals. Though not necessarily bogus, most occurances of
this are in format or other printable strings where they can be replaced by
a `\\t'.
"""
import re
import fileinput
__version__ = 2.0
__ident__ = '$Id:$'
#
# WhitespaceException
#
class WhitespaceException(Exception):
"""Base class for all exceptions raised by this module."""
class WhitespaceNonTextFileError(WhitespaceException):
"""The given file is not a text file (per UNIX which states that all
lines are \\\n terminated)."""
def __init__(self):
self.args = (100, "File not a text file.")
#
# The `whitespaceerrors' class
#
class whitespaceerrors:
"""The whitespace errors class.
This class defines the types of regular expressions used to match
the different types of whitespace errors.
One instance of this is used by the whitespace class to keep track of
the types and number of errors that have been encountered.
This is an internal class. Users of the whitespace module should only
instantiate the `whitespace' class.
"""
leadtrailre = re.compile("^[ \t]*$")
indentspacere = re.compile("^(\t*) {8}")
spacetabre = re.compile(" \t")
#strtab = re.compile("^[^\"]*\"[^\"]*\t")
strtabre = re.compile("^[^\"]*\"[^\"]*(\"[^\"]\"[^\"]*)*\t")
ateolre = re.compile("[ \t]$")
def __init__(self, noindent):
self.noindent = noindent
self.leadingerr = 0
self.trailingerr = 0
self.indenterr = 0
self.spacetaberr = 0
self.strtaberr = 0
self.ateolerr = 0
self.leadarray = []
self.indentarray = []
self.spacetabarray = []
self.strtabarray = []
self.ateolarray = []
self.trailarray = []
#
# Methods to check for whitespace errors.
#
def hasleadtrail(self, line):
"Does the given line have a leading/trailing whitespace match?"
return self.leadtrailre.match(line)
def hasindent(self, line):
"Does the given line have an indentation whitespace match?"
if self.noindent == 0:
return self.indentspacere.match(line)
else:
return 0
def hasspacetab(self, line):
"Does the given line have space followed by a tab?"
return self.spacetabre.search(line)
def hasstrtab(self, line):
"Does the given line have tab inside a literal string?"
return self.strtabre.match(line)
def hasateol(self, line):
"Does the given line have a space or a tab at the end-of-line?"
return self.ateolre.search(line)
#
# Methods to return an array of lines with whitespace errors.
#
def leading(self):
"List of all lines with leading whitespace"
return self.leadarray
def indent(self):
"List of all lines with indentation whitespace"
return self.indentarray
def spacetab(self):
"List of all lines containing spaces followed by tabs"
return self.spacetabarray
def strtab(self):
"List of all lines containing tabs in literal strings"
return self.strtabarray
def ateol(self):
"List of all lines with spaces/tabs at the end-of-line"
return self.ateolarray
def trailing(self):
"List of all lines with trailing whitespace"
return self.trailarray
#
# Methods to append an offending line to an array.
#
def append(self, lt, lineno):
"Add the given line number to the given list"
if lineno != None:
lt.append(lineno)
def setleading(self, val=1, line=None):
"""Turn on(/off) leading space flag, and note down the given line
number, if any."""
self.leadingerr = val
self.append(self.leadarray, line)
def setindent(self, val=1, line=None):
"""Turn on(/off) indentation space flag, and note down the given
line number, if any."""
self.indenterr = val
self.append(self.indentarray, line)
def setspacetab(self, val=1, line=None):
"""Turn on(/off) space followed by tab flag, and note down the given
line number, if any."""
self.spacetaberr = val
self.append(self.spacetabarray, line)
def setstrtab(self, val=1, line=None):
"""Turn on(/off) tab in literal string flag, and note down the given
line number, if any."""
self.strtaberr = val
self.append(self.strtabarray, line)
def setateol(self, val=1, line=None):
"""Turn on(/off) whitespace at end-of-line flag, and note down the
given line number, if any."""
self.ateolerr = val
self.append(self.ateolarray, line)
def settrailing(self, val=1, line=None):
"""Turn on(/off) trailing space flag, and note down the given line
number, if any."""
self.trailingerr = val
self.append(self.trailarray, line)
#
# Methods to know if a type of error has been recorded.
#
def getleading(self):
"Return 1 if leading space error encountered"
return self.leadingerr
def getindent(self):
"Return 1 if indentation space error encountered"
return self.indenterr
def getspacetab(self):
"Return 1 if space followed by tab error encountered"
return self.spacetaberr
def getstrtab(self):
"Return 1 if tabs in literal strings encountered"
return self.strtaberr
def getateol(self):
"Return 1 if space at end-of-line encountered"
return self.ateolerr
def gettrailing(self):
"Return 1 if trailing space error encountered"
return self.trailingerr
#
# The 'whitespace' class
#
class whitespace:
"""The `whitespace' class
This class checks the given file for the whitespace errors as defined
by the whitespaceerrors class.
Construction can be done be passing in a filename (mandatory) to check
for whitespace errors.
Exported API:
error() - returns one of:
< 0: A failure. Exception can be gathered from exc().
= 0: No whitespace errors.
> 0: the number of distinct whitespace errors types detected
in the given file.
errorlist() - Returns a dictionary of whitespaces error types, and the
lines they were detected in.
exc() - Returns an exception tuple if error() return a negative integer.
"""
dirty = 0
errors = ''
excep = ''
fatal = 0
filename = ''
noindent = 0
def __init__(self, filename, noindent):
"Construct a whitespace object for a given file"
self.noindent = noindent
self.filename = filename
self.errors = whitespaceerrors(noindent)
self.dirty = 0
self.fatal = 0
self.check()
def error(self):
"""Whitespace errors that have been detected in the file
< 0: A failure. Exception can be gathered from exc()
= 0: No whitespace errors
> 0: the number of distinct whitespace errors types detected
in the given file"""
if self.fatal < 0:
return self.fatal
self.dirty = 0
if self.errors.getleading():
self.dirty = self.dirty + 1
if self.errors.getindent():
self.dirty = self.dirty + 1
if self.errors.getspacetab():
self.dirty = self.dirty + 1
if self.errors.getstrtab():
self.dirty = self.dirty + 1
if self.errors.getateol():
self.dirty = self.dirty + 1
if self.errors.gettrailing():
self.dirty = self.dirty + 1
return self.dirty
def errorlist(self):
"""A dictionary of the types of whitespace errors and the list of
lines they have occurred in."""
retv = {}
if self.error() <= 0:
return retv
if self.errors.getleading():
retv["Leading whitespace"] = self.errors.leading()
if self.errors.getindent():
retv["Indentation whitespace"] = self.errors.indent()
if self.errors.getspacetab():
retv["Space followed by Tab"] = self.errors.spacetab()
if self.errors.getstrtab():
retv["Tab in string literal"] = self.errors.strtab()
if self.errors.getateol():
retv["End-of-line whitespace"] = self.errors.ateol()
if self.errors.gettrailing():
retv["Trailing whitespace"] = self.errors.trailing()
return retv
def check(self):
"""Check the given file for the different types of whitespace errors
and returns one of:
< 0: A failure. Exception can be gathered from exc()
= 0: No whitespace errors
> 0: the number of distinct whitespace errors types detected in the
given file (also obtainable from error())"""
err = 0
textfilere = re.compile("^.*\n$")
try:
lineno = 0
for line in fileinput.input(self.filename):
lineno = fileinput.filelineno()
if not textfilere.match(line):
fileinput.close()
raise WhitespaceNonTextFileError()
if self.errors.hasleadtrail(line):
if fileinput.isfirstline():
self.errors.setleading(line=lineno)
else:
self.errors.settrailing()
pass
pass
else:
self.errors.settrailing(0)
pass
if self.errors.hasindent(line):
self.errors.setindent(line=lineno)
pass
if self.errors.hasspacetab(line):
self.errors.setspacetab(line=lineno)
pass
if self.errors.hasstrtab(line):
self.errors.setstrtab(line=lineno)
pass
if self.errors.hasateol(line):
self.errors.setateol(line=lineno)
pass
pass
fileinput.close()
err = self.error()
if self.errors.gettrailing():
self.errors.settrailing(line=lineno)
pass
return err
except IOError, e:
self.excep = e
self.fatal = -e[0]
return self.fatal
except WhitespaceNonTextFileError, e:
self.excep = e
self.fatal = -e[0]
return self.fatal
def exc(self):
"The exception that last occurred"
return self.excep
def leadtrailclean(self, lt, which):
l = lt
if which == "trail":
l.reverse()
first = 1
ret = []
for line in l:
if self.errors.hasleadtrail(line):
if not first:
ret.append(line)
else:
pass
pass
else:
ret.append(line)
first = 0
pass
pass
if which == "trail":
ret.reverse()
else:
pass
return ret
def regclean(self, rematch, reg, nreg, line):
while rematch(line):
line = reg.sub(nreg, line)
pass
return line
def cleanup(self, file=None):
"""Cleanup the current file, if whitespace unclean. Given a
filename, write to the file. The same name as the original file will
cause an inplace replacement.
If no file name is given, then a .clean file is written
We don't attempt to fix the tab-in-literal-string whitespace alone,
since it is language dependent, and assuming a `\\t' would also
assume how tab is represented in various languages"""
if self.fatal:
return self.fatal
if self.error() == 0:
return 0
try:
import os, shutil
pid = os.getpid()
if file == None:
file = self.filename + ".clean"
pass
tmpfile = self.filename + "." + str(pid)
inf = open(self.filename, "r")
inputlines = inf.readlines()
inf.close()
outf = open(tmpfile, "w")
olist = []
for line in inputlines:
# Fix indentation whitespace
line = self.regclean(self.errors.hasindent,
self.errors.indentspacere,
r"\1\t",
line)
# Fix EOL whitespace
line = self.regclean(self.errors.hasateol,
self.errors.ateolre,
"",
line)
# Fix Space-followed-by-Tab whitespace
line = self.regclean(self.errors.hasspacetab,
self.errors.spacetabre,
"\t",
line)
olist.append(line)
pass
# Fix Leading space
olist = self.leadtrailclean(olist, "lead")
# Fix Trailing space
olist = self.leadtrailclean(olist, "trail")
outf.writelines(olist)
outf.close()
shutil.copymode(self.filename, tmpfile)
shutil.copy(tmpfile, file)
os.unlink(tmpfile)
return 0
except IOError, e:
self.excep = e
self.fatal = -e[0]
return self.fatal
pass
pass
import sys
import string
from getopt import getopt
class whitespaceprogram:
"""The program itself"""
name = string.split(sys.argv[0], '/')[-1]
ret = 0
flags = '?hcvVI'
verbose = 0
cleanup = 0
noindent = 0
def __init__(self):
"Construct a whitespace object for a given file"
self.run()
def errlist(self, file, dict):
for err, lines in dict.items():
if self.verbose:
print "%s: %s %s" % (file, err, lines)
else:
print "%s: %s" % (file, err)
pass
pass
pass
def usage(self):
print "%s: No input files" % self.name
print "Usage: %s [-V] [ -c | -v | -h | -I | -? ] file ..." % self.name
print "-V\t version information"
print "-c\t cleanup"
print "-v\t verbose"
print "-I\t No indentation whitespace check"
print "-h/-?\t this message"
sys.exit(-1)
pass
def run(self):
try:
optlist, args = getopt(sys.argv[1:], self.flags)
except:
self.usage()
pass
for (flag, arg) in optlist:
if flag == '-V':
print "%s v%2.1f" % (self.name, __version__)
print "Copyright (C) 1999-2000 Rajesh Vaidheeswarran"
print "All rights reserved."
print
elif flag == '-?' or flag == '-h':
self.usage()
elif flag == '-v':
self.verbose = 1
elif flag == '-c':
self.cleanup = 1
elif flag == '-I':
self.noindent = 1
pass
pass
if len(args) == 0:
self.usage()
pass
for file in args:
thisfile = whitespace(file, self.noindent)
cv = thisfile.error()
if cv < 0:
print "%s: %s" % (file, thisfile.exc()[1])
else:
self.ret = self.ret + cv
pass
if cv:
errs = thisfile.errorlist()
if self.cleanup:
if self.verbose:
self.errlist(file, errs)
pass
retv = thisfile.cleanup(file)
if retv != 0:
print "%s returned %d" % (file, retv)
pass
pass
else:
self.errlist(file, errs)
pass
pass
pass
sys.exit(self.ret)
pass
pass
if __name__ == '__main__':
whitespaceprogram()
pass