#! /usr/bin/env python import sys reload(sys) sys.setdefaultencoding('utf8') import os, re, time from lxml import html page_list = sys.argv[1:] for page in page_list: orig = open(page).read() t = html.fromstring(orig) title = re.sub(r' - Writing.Com$', r'', re.sub(r'^[^:]*:\s+', r'', t.xpath('//title/text()')[0])) body = t.xpath('//div[@class="KonaBody"]//text()') print ':: passage_%s' % page print '!%s' % title print for item in body: print item print print 'You have the folowing choices:' print ##for choice in t.xpath('//p/a')[:-1]: for choice in t.xpath('//p/a'): uri = choice.xpath('@href')[0] if uri[:11] == 'javascript:': pass else: label = choice.xpath('text()')[0] if 'Recent Additions' == label: pass elif 'Story Outline' == label: pass else: map = re.sub(r'.*/', r'', uri) print '[[%s|passage_%s]]' % (label, map) print