#!/usr/bin/python # -*- coding: iso-8859-1 -*- # Script to get book info from HelMet Library database and bookplus.fi # Net bookshop for Tellico. GPL-2 # COpyright (c) 2006 by Petri Damsten import sys, os import re import urllib2 import time import base64 import Image LANGUAGES = ['suomi', 'englanti'] if(len(sys.argv) < 2): sys.exit(1) def clean(s): return re.sub('<.*?>', '', s).strip() def getPage(url, isbn): page = urllib2.urlopen(url % isbn) #page = open("./kirjasto2.html") #page = open("./bookplus.html") content = page.read() page.close() return content def tryre(regex, s): try: return re.findall(regex, s)[0].strip() except: return '' def common(array1, array2): for s in array1: if s in array2: return s return '' def helmetDataMulti(content, key): try: s = clean(re.findall(r'(?ims)>' + key + '<.*?bibInfoData">(.*?bibInfoLabel">)', content)[0]) s = re.split('\n+', s) return s except: return [] def helmetData(content, key): try: return clean(re.findall(r'(?ims)>' + key + '<.*?bibInfoData">(.*?)', content)[0]) except: return '' def helmet(isbn): data = {} url = 'http://www.helmet.fi/search*fin/?searchtype=i&searcharg=%s' content = getPage(url, isbn) data['author'] = helmetData(content, 'Tekijä') s = helmetData(content, 'Teos') data['title'] = tryre(r'(.*?)[/:]', s) data['subtitle'] = helmetData(content, 'Alkuteos') s = helmetData(content, 'Julktiedot') data['publisher'] = tryre(r'.*:(.*?),', s) data['pub_year'] = tryre(r',\s*(\d+)', s) data['language'] = common(helmetDataMulti(content, 'Huomautus'), LANGUAGES) s = helmetData(content, 'Ulkoasu') data['pages'] = tryre(r'(\d+)', s) s = helmetData(content, 'ISBN') data['isbn'] = tryre(r'(.*?)\(', s) s = tryre(r'\((.*?)\)', s) if s == 'sid.': data['binding'] = 'Hardback' elif s == 'nid.': data['binding'] = 'Paperback' else: data['binding'] = '?' data['keywords'] = helmetDataMulti(content, 'Asiasana') data['comments'] = '' data['image'] = '' data['image_file'] = '' return data def bookplus(isbn): data = {} url = 'http://www.bookplus.fi/product.php?isbn=%s' content = getPage(url, isbn) card = tryre(r'(?ims)' + '(.*?)', content) data['title'] = tryre(r'(?ims)greentitle13">(.*?)', card) data['subtitle'] = '' data['keywords'] = '' data['author'] = tryre(r'(?ims)Author.*?>(.*?)', card) data['publisher'] = tryre(r'(?ims)Publisher.*?>(.*?)', card) data['pub_year'] = tryre(r'(?ims)vuosi: (.*?)
', card) data['language'] = tryre(r'(?ims)Kieli: (.*?)
', card) data['pages'] = tryre(r'(?ims)Sivuja: (.*?)
', card) data['isbn'] = tryre(r'(?ims)ISBN: (.*?)
', card) binding = tryre(r'(?ims)Tuotemuoto: (.*?)
', card) if binding[:6] == 'Pehmeä': data['binding'] = 'Paperback' else: data['binding'] = 'Hardback' image = tryre(r'(?ims).*?' + '(.*?).*?', content) return data def printData(data): print 'Author : ' + data['author'] print 'Title : ' + data['title'] print 'Subtitle : ' + data['subtitle'] print 'Publisher : ' + data['publisher'] print 'Pub Year : ' + data['pub_year'] print 'Language : ' + data['language'] print 'Pages : ' + data['pages'] print 'ISBN : ' + data['isbn'] print 'Keywords : ' + data['keywords'] print 'Binding : ' + data['binding'] print '-------------------------------------------' print data['comments'] print '-------------------------------------------' print data['image'] def printXML(data): try: i = Image.open('/tmp/' + data['image_file']) except: i = None s = '' s += '\n' s += '\n' s += '\n' s += ' \n' s += ' \n' s += ' %s\n' % data['title'].decode('iso-8859-1') s += ' %s\n' % data['subtitle'].decode('iso-8859-1') s += ' \n' s += ' %s\n' % data['author'].decode('iso-8859-1') s += ' \n' s += ' %s\n' % data['binding'].decode('iso-8859-1') s += ' %s\n' % data['publisher'].decode('iso-8859-1') s += ' %s\n' % data['pub_year'] s += ' %s\n' % data['isbn'] s += ' %s\n' % data['pages'] s += ' \n %s\n\n' % \ data['language'].decode('iso-8859-1') if len(data['keywords']) > 0: s += ' \n' for word in data['keywords']: s += ' %s\n' % word.decode('iso-8859-1') s += ' \n' if i != None: s += ' %s\n' % data['image_file'] if data['comments'] != '': comment = data['comments'].replace('
', '\n') comment = clean(comment) s += ' %s\n' % comment.decode('iso-8859-1') s += '
\n' if i != None: s += ' \n' s += ' \n' % \ (i.size[0], i.size[1], data['image_file']) s += data['image'] + '\n' s += ' \n' s += ' \n' s += '
\n' s += '
\n' print s.encode('utf-8') def combine(data1, data2): data = {} for key in data1.keys(): if(data1[key] != ''): data[key] = data1[key] else: data[key] = data2[key] return data def cleanIsbn(isbn): return re.sub('[^0-9]', '', isbn) isbn = cleanIsbn(sys.argv[1]) data1 = helmet(isbn) data2 = bookplus(isbn) data = combine(data1, data2) printXML(data)