#! /usr/bin/env python import sys reload(sys) sys.setdefaultencoding('utf8') from lxml import html def my_print(item, search): if search.split('/')[-1] == 'a': print item.xpath('@href')[0], '|', item.text.strip() else: print item search = sys.argv[1] page_list = sys.argv[2:] for page in page_list: orig = open(page).read() t = html.fromstring(orig) list = t.xpath(search) if isinstance(list, basestring): my_print(list, search) else: for item in list: my_print(item, search)