#!/usr/bin/python
# -*- coding: iso-8859-1 -*-
# Script to get book info from HelMet Library database and bookplus.fi
# Net bookshop for Tellico. GPL-2
# COpyright (c) 2006 by Petri Damsten
import sys, os
import re
import urllib2
import time
import base64
import Image
LANGUAGES = ['suomi', 'englanti']
if(len(sys.argv) < 2):
sys.exit(1)
def clean(s):
return re.sub('<.*?>', '', s).strip()
def getPage(url, isbn):
page = urllib2.urlopen(url % isbn)
#page = open("./kirjasto2.html")
#page = open("./bookplus.html")
content = page.read()
page.close()
return content
def tryre(regex, s):
try:
return re.findall(regex, s)[0].strip()
except:
return ''
def common(array1, array2):
for s in array1:
if s in array2:
return s
return ''
def helmetDataMulti(content, key):
try:
s = clean(re.findall(r'(?ims)>' + key +
'<.*?bibInfoData">(.*?bibInfoLabel">)', content)[0])
s = re.split('\n+', s)
return s
except:
return []
def helmetData(content, key):
try:
return clean(re.findall(r'(?ims)>' + key +
'<.*?bibInfoData">(.*?)', content)[0])
except:
return ''
def helmet(isbn):
data = {}
url = 'http://www.helmet.fi/search*fin/?searchtype=i&searcharg=%s'
content = getPage(url, isbn)
data['author'] = helmetData(content, 'Tekijä')
s = helmetData(content, 'Teos')
data['title'] = tryre(r'(.*?)[/:]', s)
data['subtitle'] = helmetData(content, 'Alkuteos')
s = helmetData(content, 'Julktiedot')
data['publisher'] = tryre(r'.*:(.*?),', s)
data['pub_year'] = tryre(r',\s*(\d+)', s)
data['language'] = common(helmetDataMulti(content, 'Huomautus'),
LANGUAGES)
s = helmetData(content, 'Ulkoasu')
data['pages'] = tryre(r'(\d+)', s)
s = helmetData(content, 'ISBN')
data['isbn'] = tryre(r'(.*?)\(', s)
s = tryre(r'\((.*?)\)', s)
if s == 'sid.':
data['binding'] = 'Hardback'
elif s == 'nid.':
data['binding'] = 'Paperback'
else:
data['binding'] = '?'
data['keywords'] = helmetDataMulti(content, 'Asiasana')
data['comments'] = ''
data['image'] = ''
data['image_file'] = ''
return data
def bookplus(isbn):
data = {}
url = 'http://www.bookplus.fi/product.php?isbn=%s'
content = getPage(url, isbn)
card = tryre(r'(?ims)' +
'(.*?)', content)
data['title'] = tryre(r'(?ims)greentitle13">(.*?)', card)
data['subtitle'] = ''
data['keywords'] = ''
data['author'] = tryre(r'(?ims)Author.*?>(.*?)', card)
data['publisher'] = tryre(r'(?ims)Publisher.*?>(.*?)', card)
data['pub_year'] = tryre(r'(?ims)vuosi: (.*?)
', card)
data['language'] = tryre(r'(?ims)Kieli: (.*?)
', card)
data['pages'] = tryre(r'(?ims)Sivuja: (.*?)
', card)
data['isbn'] = tryre(r'(?ims)ISBN: (.*?)
', card)
binding = tryre(r'(?ims)Tuotemuoto: (.*?)
', card)
if binding[:6] == 'Pehmeä':
data['binding'] = 'Paperback'
else:
data['binding'] = 'Hardback'
image = tryre(r'(?ims).*?' +
'(.*?).*?',
content)
return data
def printData(data):
print 'Author : ' + data['author']
print 'Title : ' + data['title']
print 'Subtitle : ' + data['subtitle']
print 'Publisher : ' + data['publisher']
print 'Pub Year : ' + data['pub_year']
print 'Language : ' + data['language']
print 'Pages : ' + data['pages']
print 'ISBN : ' + data['isbn']
print 'Keywords : ' + data['keywords']
print 'Binding : ' + data['binding']
print '-------------------------------------------'
print data['comments']
print '-------------------------------------------'
print data['image']
def printXML(data):
try:
i = Image.open('/tmp/' + data['image_file'])
except:
i = None
s = ''
s += '\n'
s += '\n'
s += '\n'
s += ' \n'
s += ' \n'
s += ' %s\n' % data['title'].decode('iso-8859-1')
s += ' %s\n' % data['subtitle'].decode('iso-8859-1')
s += ' \n'
s += ' %s\n' % data['author'].decode('iso-8859-1')
s += ' \n'
s += ' %s\n' % data['binding'].decode('iso-8859-1')
s += ' %s\n' % data['publisher'].decode('iso-8859-1')
s += ' %s\n' % data['pub_year']
s += ' %s\n' % data['isbn']
s += ' %s\n' % data['pages']
s += ' \n %s\n\n' % \
data['language'].decode('iso-8859-1')
if len(data['keywords']) > 0:
s += ' \n'
for word in data['keywords']:
s += ' %s\n' % word.decode('iso-8859-1')
s += ' \n'
if i != None:
s += ' %s\n' % data['image_file']
if data['comments'] != '':
comment = data['comments'].replace('
', '\n')
comment = clean(comment)
s += ' %s\n' % comment.decode('iso-8859-1')
s += ' \n'
if i != None:
s += ' \n'
s += ' \n' % \
(i.size[0], i.size[1], data['image_file'])
s += data['image'] + '\n'
s += ' \n'
s += ' \n'
s += ' \n'
s += '\n'
print s.encode('utf-8')
def combine(data1, data2):
data = {}
for key in data1.keys():
if(data1[key] != ''):
data[key] = data1[key]
else:
data[key] = data2[key]
return data
def cleanIsbn(isbn):
return re.sub('[^0-9]', '', isbn)
isbn = cleanIsbn(sys.argv[1])
data1 = helmet(isbn)
data2 = bookplus(isbn)
data = combine(data1, data2)
printXML(data)