Bri Hatch | Personal | Work |
---|---|---|
Onsight, Inc bri@ifokr.org |
ExtraHop Networks bri@extrahop.com |
if (a = b - 1)
GLOBAL_OR_CLASS_CONSTANT
some_local_variable
SomeCoolClass
a_package_name
a_module_name
some_function_name()
some_method_name()
_internal_class_or_module_name
__private_class_or_module_name
__python_builtins__
General rules
foo = True # A True value foo = False # A False value foo = None # akin to NULL, N/A, etc. print 1 == 1 TrueWhat's False? None, False, 0, empty string, empty dictionary, empty array, empty tuple.
What's True? Everything else.
foo = 100 # foo is now 100 foo += 10 # foo is now 110 print type(999) int print type(0.5) float # Even boring numbers are objects dir(foo) foo.bit_length foo.conjugate foo.denominator foo.imag foo.numerator foo.real foo++ SyntaxError # No auto increment for you!
foo = 'Hello, world' foo = "Hello, world" # Both types are equivalent foo = 'Hello, world\n' foo = "Hello, world\n" # Both of these are equivalent too. foo = "Who's on first?" foo = 'Who\'s on first?' # Backslash escaping print r'c:\\My Drive\subdir\wipe.exe' # Raw - no escaping c:\\My Drive\subdir\wipe.exe
foo = ('This string ' 'is automagically ' 'concatenated.') print foo This string is automagically concatenated. print type(foo) string foo = unicode('I prefer ascii') print type(foo) unicode print """A very long string that has multiple lines in it.""" A very long string that has multiple lines in it.
print foo Hello, world print foo[1] e print foo[7:9] wo print foo[7:] world
dir(foo) foo.capitalize foo.isalnum foo.lstrip foo.splitlines foo.center foo.isalpha foo.partition foo.startswith foo.count foo.isdigit foo.replace foo.strip foo.decode foo.islower foo.rfind foo.swapcase foo.encode foo.isspace foo.rindex foo.title foo.endswith foo.istitle foo.rjust foo.translate foo.expandtabs foo.isupper foo.rpartition foo.upper foo.find foo.join foo.rsplit foo.zfill foo.format foo.ljust foo.rstrip foo.index foo.lower foo.split print 'Hello, World!'.upper() HELLO, WORLD!
foo = { 'domain': 53, 'http': 80, 'https': 443, 'imap': 143, 'smtp': 25, } print foo['smtp'] 25Datatypes - Dictionaries (cont)
foo = { 'domain': 53, 'http': 80, 'https': 443, 'imap': 143, 'smtp': 25, } print foo['ssh'] KeyError print foo.get('ssh') None print foo.get('ssh', 'no clue') no clueLists
A collection of ordered elements, aka arrays.foo = ["Who's on first", "What's on second", "I don't know - third base"] print foo[0] Who's on first print foo[0:2] # Up to but not including ["Who's on first", "What's on second"] print foo[1:] ["What's on second", "I don't know - third base"]Lists (cont)
print foo.pop(1) What's on second print foo ["Who's on first", "I don't know - third base"] dir(foo) foo.append foo.count foo.extend foo.index foo.insert foo.pop foo.remove foo.reverse foo.sort foo.append("I don't give a darn") print foo ["Who's on first", "I don't know - third base", "I don't give a darn"]Lists (cont)
foo.extend(['Yesterday', 'Tomorrow']) print foo ["Who's on first", "I don't know - third base", "I don't give a darn", 'Yesterday', 'Tomorrow']What does this do?foo.extend("He's our shortstop") print fooLists (cont)
A string is an iterable - it iterates through each character, thusfoo.extend("He's our shortstop") print foo ["Who's on first", "I don't know - third base", "I don't give a darn", 'Yesterday', 'Tomorrow', 'H', 'e', "'", 's', ' ', 'o', 'u', 'r', ' ', 's', 'h', 'o', 'r', 't', 's', 't', 'o', 'p']Tuples
Non-mutable ordered structures.userinfo = ('jenny', 'Jenny', 'Tutone') print userinfo[1] Jenny dir(userinfo) userinfo.count userinfo.index print userinfo + ('800.867.5309',) ('jenny', 'Jenny', 'Tutone', '800.867.5309')Functions
Functions and methods start with defdef diehorribly(): """Exits immediately.""" sys.exit(2) def frob_the_widget(widget, frobfactor): """Frobs a widget with frobfactor goodness. frobfactor must be an even prime greater than 5. """ if frobfactor < 5: raise Error('frobfactor "%s" is too small.' % frobfactor) ... print diehorribly.__doc__ 'Exits immediately'Docstrings
Various formatters exist for your source code. Even minimal documentation is useful via pydoc or ipython.$ ipython import runcmd help(runcmd.runcmd) runcmd(command, command_input=None, cwd=None) Run a command, potentially w/ stdin; capture stdout/err. Arguments: command: command to run, as a list. command_input: string containing input to program. cwd: directory into which we should cd before running the command. Returns to our current directory when done. Returns a tuple of return code, stdout, stderr.Function Arguments
Python supports many styles of passing arguments.# A function that has one mandatory argument, and one optional one # with a default. Named parameters can occur in any order after # the non-keyword arguments. def argfunc(arg1, arg2='foo', arg3='bar'): """Print the args that are sent.""" print 'You sent %s/%s/%s' % (arg1, arg2, arg3) argfunc('Hello') You sent Hello/foo/bar argfunc('Hello', 'World') You sent Hello/World/bar argfunc('Hello', arg3='World') You sent Hello/foo/World argfunc('Hello', arg3='World', arg2='Big') You sent Hello/Big/WorldFunction Arguments (Cont)
An asterisk lets you slurp additional arguments into a list.def argfunc(arg1, *mylist): """Print the args that are sent.""" print 'You sent %s and "%s"' % (arg1, ','.join(mylist) argfunc('Hello') You sent Hello and "" argfunc('Hello', 'World') You sent Hello and "World" argfunc('Hello', 'Big', 'Bad', 'World') You sent Hello and "Big,Bad,World"Function Arguments (cont)
A double asterisk puts keyword arguments in a dictionary.def newfunc(arg1, foo=None, *args, **kwargs): """Show some things.""" print 'arg1 == %s\nfoo == %s\nargs == %s\nkwargs == %s' % ( arg1, foo, args, kwargs) newfunc('wbagg') arg1 == wbagg foo == None args == () kwargs == {} newfunc('wbagg', 12, 'hello', 'world', method='ugly', type='fast') arg1 == wbagg foo == 12 args == ('hello', 'world') kwargs == {'type': 'fast', 'method': 'ugly'}Loops and Logic
You knew they were coming.if foo != 100: do_something while foo < 100: do_something, probably changing foo for thing in iterable: do_something with things from iterableLoops and Logic (cont)
Loops also offer
- continue - return to the top (and test) of the loop.
- break - leave the loop
- else - runs if the loop completes without having encountered a break
# Print numbers from 0 to 99, most of the time foo = 0 while foo < 100: print foo foo += 1 if random.randint(1000) == 42: break else: print "Today's not my lucky day."Loops and Logic (cont)
For loops over dictionaries iterate only through keys by default:colours = {'red': '255 0 0', 'thistle': '216 191 216', 'violet': '238 130 238'} for colour in colours: print colour thistle violet redIteritems can be useful too.for (k,v) in colours.iteritems(): print k, v thistle 216 191 216 violet 238 130 238 red 255 0 0Generators
Generators create iterables from a function via yield. Typically they're used to avoid pre-generating large lists, or slurping in data incrementally rather than all at once.def fives(count): num = 0 while num < count: yield 5 * num num += 1 for five in fives(10): print five, 0 5 10 15 20 25 30 35 40 45 # Of course we could have just done this for five in range(0, 46, 5): print five,List Comprehensions
List Comprehensions let you generate lists without loops.colours = {'red': '255 0 0', 'thistle': '216 191 216', 'violet': '238 130 238'} colournames = ['red', 'violet', 'thistle'] print [colours[x] for x in colournames] ['216 191 216', '255 0 0', '238 130 238'] nums = [0, 1, 2, 3, 4, 5, 6, 7, 8] print [x for x in nums if x % 2 == 0] [0, 2, 4, 6, 8]Lambdas
A lambda
- returns a (nameless) function.
- very useful in unit tests / mocks
- is beyond the scope of this talk
Exceptions
Python supports exception handling.
- Allows a function to report an error condition
- Can be caught at any level of the stack
- Can be re-raised
- Great for avoiding propagating error conditions up the stack by many layers of return code logic.
Exceptions (cont)
Many builtin types, including
- IOError
- IndexError
- KeyError
- SyntaxError
- TypeError
Exceptions (cont)
Each module can (should!) create its own Error class:
#!/usr/bin/env python ... class Error(Exception): """The frobnicate exception class.""" pass class FrobsExceeded(Error): """Frobnication limit exceeded.""" pass class Frobber(object): .... def frobit(): ... if self.frobs_remaining <= 0: raise FrobsExceeded('No more frobs allowed. ' (Performed %d)' % self.frobs_completed)Exceptions (cont)
The calling code can catch exceptions with whatever specificity you like with try/except:#!/usr/bin/env python ... import frobnicate ... frobber = frobnicate.Frobber(target='/dev/thx1138') try: while True: frobber.frob() except frobnicate.FrobsExceeded: print 'Finished frobbing.' except frobnicate.Error as err: sys.exit('Unexpected frobnicate error "%s"' % err) except Exception: print 'blindly ignoring error, a horrible idea.'Classes
Python has built-in OO capabilities. Each class can have the following:
- A constructor method, __init__
- Class variables (typically constants)
- Object methods, which take as the first argument a reference to the object itself, typically called self.
- Class methods, which are not related to specific objects of that class
- Convention-based non-public variables and methods, starting with an underscore
- Private (name-mangled) variables and methods, starting with double underscore
Classes (cont)
$ cat fakecoin.py import random class FakeCoin(object): def __init__(self, pct=1): """FakeCoin constructor.""" self.pct = pct self.coins = 0 def mine(self): """Try to mine another coin.""" if random.randint(0, 100) < self.pct: self.coins += 1 def coins(self): """Return number of coins generated thus far.""" return self.coins if __name__ == '__main__': print 'This is a library only, dummy.'Classes (cont)
$ cat make_fakecoins.py #!/usr/bin/env python """Generate a bunch of FakeCoins.""" import sys import fakecoin def main(argv): """Main.""" try: percentage = int(argv[1]) except IndexError: percentage = 20 bucket = fakecoin.FakeCoin(percentage) for x in range(1000): bucket.mine() print bucket.coins if __name__ == '__main__': main(sys.argv)Modules
A module is something you can import.
- Found in a system-installed location, or found relative to the program itself.
- Must be python code that, when "run", does not cause side effects - typically just a bunch of classes and/or functions.
- Can forcibly put stuff into your namespace, if desired.
Modules
Various ways of importing modules:#!/usr/bin/env python import random from frobnicate import clue_by_four import dev.mungetasticize as mungetasticize if __name__ == '__main__': random.randint(20) clue_by_four(target='sandy') mungetasticize.with_prejudice('/dev/fd0')My Python Skeleton
Every one of my scripts starts out like this:#!/usr/bin/env python """foo: Do something interesting.""" import optparse import textwrap def main(): """Main.""" parser = optparse.OptionParser() parser.usage = textwrap.dedent("""\ %prog [options] [arguments] Do something interesting. """)My Python Skeleton (cont)
parser.add_option('--foo', dest='foo', action='store_true', help='Do something foofull') opts, args = parser.parse_args() if args: parser.error('Extraneous command line options found.') if opts.foo: do_something_fooish if __name__ == '__main__': main()Pylint
Pylint shows syntax and style violations.$ pylint mungetasticize.py ************* Module mungetasticize C: 1,0: Missing docstring W: 59,8:get_csv: No exception type(s) specified W:118,16:main: No exception type(s) specified W:101,10:main: Unused variable 'args' ... +-----------+-------+---------+-----------+ |type |number |previous |difference | +===========+=======+=========+===========+ |convention |6 |6 |= | +-----------+-------+---------+-----------+ ...It includes a lot of 'reports' that I find annoying, so I use$ pylint --rcfile .pylintrc --include-ids=y --reports n filenamePEP8
pep8 checks your code for PEP8 violations:$ pep8 --repeat mungetasticize.py mungetasticize.py:22:1: E302 expected 2 blank lines, found 1 mungetasticize.py:48:80: E501 line too long (80 characters) mungetasticize.py:67:1: W293 blank line contains whitespacePychecker
Yet another tool, pychecker, looks for common mistakes
$ pychecker mungetasticize.py Processing module mungetasticize (./mungetasticize.py)... Warnings... mungetasticize.py:63: Statement appears to have no effectDecorators
Decorators allow you to modify how a function, method, or class behaves, without changing the source code or subclassing it.Much like lambdas, we're not covering them here; our time is finite.
Unit tests
Unit tests (often part of test-driven development) are awesome.
- Removes need to manually run your code through a barrage of tests every time you add features or refactor.
- Can 'mock out' dependencies, such as databases, files, clients.
- Best when created during development, rather than after the fact.
Unit tests (cont)
A unit test suite is a file with a class that inherits from the unittest.TestCase class. Each method name that starts with "test" is run one by one. Unit tests do not need to return anything, but should raise Exceptions if the tests fail.There are many builtins for testing and raising exceptions, such as
- assertTrue( expression)
- assertFalse( expression)
- assertEqual( thing1, thing2)
- assertNotEqual( thing1, thing2)
- assertRaises( Exception_Class, function [,arguments])
Unit Tests (cont)
Example code with bug:#!/usr/bin/env python """gorilla2mitro: Convert a Gorilla csv to Mitro csv.""" import csv import optparse import sys import textwrap FIELD_MAPPING = { 'notes': 'extra', 'password': 'password', 'title': 'name', 'url': 'url', 'user': 'username', }Unit tests (cont)
def main(): """Main.""" parser = optparse.OptionParser() parser.usage = textwrap.dedent("""\ %prog [options] input.csv output.csv Convert a Gorilla csv to Mitro csv """) dummy_opts, args = parser.parse_args() if len(args) != 2: parser.error('input and output file required.') input_file = args[0] output_file = args[1] convert(open(input_file), open(output_file, 'w'))Unit tests (cont)
def convert(input_fh, output_fh): gorilla_csv = csv.DictReader(input_fh) csv_writer = csv.DictWriter(output_fh, fieldnames=FIELD_MAPPING.values()) csv_writer.writeheader() for row in gorilla_csv: newrow = {} for oldkey in FIELD_MAPPING: newrow[FIELD_MAPPING[oldkey]] = row[oldkey] csv_writer.writerow(newrow) if __name__ == '__main__': main()Unit tests (cont)
Here's our first stab at a unit test:#!/usr/bin/env python import StringIO import gorilla2mitro import unittestUnit Tests (cont)
GOOD_CSV = """\ url,title,user,password,notes www.extrahop.com,ExtraHop Website,wendell,IShipSeriarity, www.itblame.com,IT Blame Game,foo,CosmicR&ys, www.networktimeout.com,NTI,mollcroft,CosmicR&ys,Packet Capture Analysis Tool """ GOOD_RESULT_CSV = """\ url,extra,password,username,name www.extrahop.com,,IShipSeriarity,wendell,ExtraHop Website www.itblame.com,,CosmicR&ys,foo,IT Blame Game www.networktimeout.com,Packet Capture Analysis Tool,CosmicR&ys,mollcroft,NTI """.replace('\n', '\r\n')Unit Tests (cont)
class MyTests(unittest.TestCase): """MyTests class.""" def test_normal(self): """Verify we can parse a functional file.""" input_fh = StringIO.StringIO(GOOD_CSV) output_fh = StringIO.StringIO() gorilla2mitro.convert(input_fh, output_fh) output_fh.seek(0) output = output_fh.read() self.assertEqual(output, GOOD_RESULT_CSV) if __name__ == '__main__': unittest.main()Unit tests (cont)
There are bugs both in our code and in our unit test code.Finding and expanding is an exercise left to the reader. ;-)
Python is cool!
Any questions?
Thanks!
Presentation: http://www.ifokr.org/bri/presentations/seagl-2014-python/
Personal Work Bri Hatch
Onsight, Inc
bri@ifokr.orgBri Hatch
ExtraHop Networks
bri@extrahop.comCopyright 2014, Bri Hatch, Creative Commons BY-NC-SA License