#! /usr/bin/env python """ Usage: cat full_41 | ./tweak_tex.py > full_41.tex Reads the TeX generated by the full-tex.xsl transformation file from standard input, fixes a few things, and dumps the resulting TeX file to standard output. Generally this is all done in one step: $ xalan -param get_name 41 -xsl full-tex.xsl \ -in ../../beer_recipes.xml | ./tweak_tex.py > full_41.tex Then: $ pdftex full_41.tex This program performs the following fixes: * Removes extra whitespace at the start of lines (sed 's/^ *//') * Removes extra lines * Escapes the percent symbol when it follows a number (sed 's/\([0-9]\)%/\1\\%/g') * Inserts the degree symbol when there's a number followed by F * smallcaps common abbreviations (WLP, AA) * Inserts fractions for common decimals (0.25, 0.5, 0.75) * Reformats the Recipe table so the integer and fractional terms line up """ import re import sys left_whitespace_re = re.compile('^ *') percent_symbol_re = re.compile('([0-9])%') recipe_zero_decimal_re = re.compile('\.0+&') recipe_quarter_re = re.compile('([0-9]+)\.25&(pound|pounds|ounce|ounces|gram|grams|teaspoon|teaspoons)') recipe_half_re = re.compile('([0-9]+)\.50?&(pound|pounds|ounce|ounces|gram|grams|teaspoon|teaspoons)') recipe_threequarter_re = re.compile('([0-9]+)\.75&(pound|pounds|ounce|ounces|gram|grams|teaspoon|teaspoons)') recipe_line_re = re.compile('^.*&.*&.*&.*\\cr$') recipe_format_re = re.compile('#~&') recipe_multispan_re = re.compile('multispan4') fraction_re = re.compile('\\\\frac') generic_quarter_re = re.compile('\.25 (quart|quarts|teaspoon|teaspoons|ml|gallon|gallons)') generic_half_re = re.compile('\.50? (quart|quarts|teaspoon|teaspoons|ml|gallon|gallons)') generic_threequarter_re = re.compile('\.75 (quart |quarts |teaspoon |teaspoons |ml |gallon |gallons )') generic_twothirds_re = re.compile(' 2\/3 (quart |quarts |teaspoon |teaspoons |ml |gallon |gallons )') degree_re = re.compile('([0-9]) ?(F)') wlp_re = re.compile('WLP') aa_re = re.compile('AA') xl_re = re.compile('XL') hbd_re = re.compile('HBD') xpr_re = re.compile('XPR') byo_re = re.compile('BYO') ipa_re = re.compile('IPA') vip_re = re.compile('VIP') crystal_re = re.compile('([0-9]+0)L') dash_fix_re = re.compile('([0-9]) ?- ?([0-9])') blank_line_re = re.compile('^$') kolsch_re = re.compile('Kolsch') arrow_re = re.compile('->') normal_half_re = re.compile(' 1/2') normal_quarter_re = re.compile(' 1/4') month_re = re.compile('[0-9]-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[12]') doublequote_re = re.compile('"([^"]+)"') previous_blank = 0 for line in sys.stdin.readlines(): print_line = 1 line = line.strip() # Get rid of extra blank lines if blank_line_re.match(line): if previous_blank: print_line = 0 previous_blank = 1 else: previous_blank = 0 # Get rid of extra whitespace, fix percent, hash symbol line = left_whitespace_re.sub('', line) line = percent_symbol_re.sub('\g<1>\\%', line) # Replace dash with en dash seperating numbers if dash_fix_re.search(line): line = dash_fix_re.sub('\g<1>--\g<2>', line) # Replace fractions in the recipe if recipe_half_re.match(line): if int(recipe_half_re.match(line).group(1)): line = recipe_half_re.sub('\g<1>\\\\frac1/2&\g<2>', line) else: line = recipe_half_re.sub('\\\\frac1/2&\g<2>', line) if recipe_quarter_re.match(line): if int(recipe_quarter_re.match(line).group(1)): line = recipe_quarter_re.sub('\g<1>\\\\frac1/4&\g<2>', line) else: line = recipe_quarter_re.sub('\\\\frac1/4&\g<2>', line) if recipe_threequarter_re.match(line): if int(recipe_threequarter_re.match(line).group(1)): line = recipe_threequarter_re.sub('\g<1>\\\\frac3/4&\g<2>', line) else: line = recipe_threequarter_re.sub('\\\\frac3/4&\g<2>', line) line = recipe_zero_decimal_re.sub('&', line) # Fix some word forms if kolsch_re.search(line): line = kolsch_re.sub('K\\"olsch', line) # Reformat the recipe if recipe_line_re.match(line): columns = line.split('&') if fraction_re.search(columns[0]): columns[0] = fraction_re.sub('&\\\\frac', columns[0]) else: columns[1] = "&" + columns[1] line = '&'.join(columns) line = recipe_format_re.sub('#&#\\hfil~&', line) line = recipe_multispan_re.sub('multispan5', line) line = generic_quarter_re.sub('\\\\frac1/4 \g<1>', line) line = generic_half_re.sub('\\\\frac1/2 \g<1>', line) line = generic_threequarter_re.sub('\\\\frac3/4 \g<1>', line) line = generic_twothirds_re.sub('\\\\frac2/3 \g<1>', line) line = degree_re.sub('\g<1>\\deg{}{\sc \\lowercase{\g<2>}}', line) line = aa_re.sub('{\sc aa}', line) line = xl_re.sub('{\sc xl}', line) line = wlp_re.sub('{\sc wlp}', line) line = hbd_re.sub('{\sc hbd}', line) line = xpr_re.sub('{\scttle xpr}', line) line = byo_re.sub('{\sc byo}', line) line = ipa_re.sub('{\sc ipa}', line) line = vip_re.sub('{\sc vip}', line) line = crystal_re.sub('\g<1>{\sc l}', line) line = arrow_re.sub('---\kern-1.2ex\lower0.04em\hbox{>}', line) line = normal_half_re.sub(' \\\\frac1/2', line) line = normal_quarter_re.sub(' \\\\frac1/4', line) line = doublequote_re.sub("``\g<1>''", line) if month_re.search(line): line = line.lower() if print_line: print line