Python for Everybody

Chapter 11

Exercise 11.1

"""
Exercise  11.1: Write a simple program to simulate the operation of the grep
command on Unix. Ask the user to enter a regular expression and count the
number of lines that matched the regular expression:

$ python grep.py
Enter a regular expression: ^Author
mbox.txt had 1798 lines that matched ^Author

$ python grep.py
Enter a regular expression: ^X-
mbox.txt had 14368 lines that matched ^X-

$ python grep.py
Enter a regular expression: java$
mbox.txt had 4218 lines that matched java$

Python for Everybody: Exploring Data Using Python 3
by Charles R. Severance

Solution by Jamison Lahman, June 4, 2017
"""
import re

count = 0                               # Initialize variables

input_exp = input('Enter a regular expression: ')
reg_exp = str(input_exp)                # Regular Expressions are strings
fname = 'mbox.txt'
fhand = open(fname)

for line in fhand:
    line = line.rstrip()

    # Only counts if something was found
    if re.findall(reg_exp, line) != []:
        count += 1

print(fname + ' had ' + str(count) + ' lines that matched ' + reg_exp)
    

Exercise 11.2

"""
Exercise  11.2: Write a program to look for lines of the form

'New Revision: 39771'

and extract the number from each of the lines using a regular expression and
the findall() method. Compute the average of the numbers and print out the
average.

Enter file:mbox.txt
38549.7949721

Enter file:mbox-short.txt
39756.9259259

Python for Everybody: Exploring Data Using Python 3
by Charles R. Severance

Solution by Jamison Lahman, June 4, 2017
"""
import re


rev = []

fname = input('Enter file: ')
try:
    fhand = open(fname)
except FileNotFoundError:
    print('File cannot be opened: ', fname)
    exit()


for line in fhand:
    line = line.rstrip()
    rev_temp = re.findall('^New Revision: ([0-9.]+)', line)
    if not rev_temp:
        for val in rev_temp:
            val = float(val)            # Convert the strings to floats
            rev = rev + [val]           # Concats new values

rev_sum = sum(rev)
count = float(len(rev))
rev_ave = rev_sum / count

print(rev_ave)