|
|
|
@ -33,7 +33,7 @@ Update (checked) = %{title}
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import sys, os, re, md5, random, string
|
|
|
|
|
import urllib, urllib2, time, base64
|
|
|
|
|
import urllib.request, urllib.parse, urllib.error, time, base64
|
|
|
|
|
import xml.dom.minidom
|
|
|
|
|
|
|
|
|
|
XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
|
|
|
|
@ -86,7 +86,7 @@ class BasicTellicoDOM:
|
|
|
|
|
entryNode.setAttribute('id', str(self.__currentId))
|
|
|
|
|
|
|
|
|
|
titleNode = self.__doc.createElement('title')
|
|
|
|
|
titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8')))
|
|
|
|
|
titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8')))
|
|
|
|
|
|
|
|
|
|
yearNode = self.__doc.createElement('pub_year')
|
|
|
|
|
yearNode.appendChild(self.__doc.createTextNode(d['pub_year']))
|
|
|
|
@ -101,25 +101,25 @@ class BasicTellicoDOM:
|
|
|
|
|
writersNode = self.__doc.createElement('writers')
|
|
|
|
|
for g in d['writer']:
|
|
|
|
|
writerNode = self.__doc.createElement('writer')
|
|
|
|
|
writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
|
|
|
|
|
writerNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
|
|
|
|
|
writersNode.appendChild(writerNode)
|
|
|
|
|
|
|
|
|
|
genresNode = self.__doc.createElement('genres')
|
|
|
|
|
for g in d['genre']:
|
|
|
|
|
genreNode = self.__doc.createElement('genre')
|
|
|
|
|
genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
|
|
|
|
|
genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8')))
|
|
|
|
|
genresNode.appendChild(genreNode)
|
|
|
|
|
|
|
|
|
|
commentsNode = self.__doc.createElement('comments')
|
|
|
|
|
#for g in d['comments']:
|
|
|
|
|
# commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8')))
|
|
|
|
|
commentsData = string.join(d['comments'], '\n\n')
|
|
|
|
|
commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8')))
|
|
|
|
|
commentsNode.appendChild(self.__doc.createTextNode(str(commentsData, 'latin-1').encode('utf-8')))
|
|
|
|
|
|
|
|
|
|
artistsNode = self.__doc.createElement('artists')
|
|
|
|
|
for k, v in d['artist'].iteritems():
|
|
|
|
|
for k, v in d['artist'].items():
|
|
|
|
|
artistNode = self.__doc.createElement('artist')
|
|
|
|
|
artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8')))
|
|
|
|
|
artistNode.appendChild(self.__doc.createTextNode(str(v, 'latin-1').encode('utf-8')))
|
|
|
|
|
artistsNode.appendChild(artistNode)
|
|
|
|
|
|
|
|
|
|
pagesNode = self.__doc.createElement('pages')
|
|
|
|
@ -132,7 +132,7 @@ class BasicTellicoDOM:
|
|
|
|
|
imageNode = self.__doc.createElement('image')
|
|
|
|
|
imageNode.setAttribute('format', 'JPEG')
|
|
|
|
|
imageNode.setAttribute('id', d['image'][0])
|
|
|
|
|
imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8')))
|
|
|
|
|
imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8')))
|
|
|
|
|
|
|
|
|
|
coverNode = self.__doc.createElement('cover')
|
|
|
|
|
coverNode.appendChild(self.__doc.createTextNode(d['image'][0]))
|
|
|
|
@ -156,17 +156,17 @@ class BasicTellicoDOM:
|
|
|
|
|
Prints entry's XML content to stdout
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
print nEntry.toxml()
|
|
|
|
|
print(nEntry.toxml())
|
|
|
|
|
except:
|
|
|
|
|
print sys.stderr, "Error while outputing XML content from entry to Tellico"
|
|
|
|
|
print(sys.stderr, "Error while outputing XML content from entry to Tellico")
|
|
|
|
|
|
|
|
|
|
def printXMLTree(self):
|
|
|
|
|
"""
|
|
|
|
|
Outputs XML content to stdout
|
|
|
|
|
"""
|
|
|
|
|
self.__collection.appendChild(self.__images)
|
|
|
|
|
print XML_HEADER; print DOCTYPE
|
|
|
|
|
print self.__root.toxml()
|
|
|
|
|
print(XML_HEADER); print(DOCTYPE)
|
|
|
|
|
print(self.__root.toxml())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DarkHorseParser:
|
|
|
|
@ -193,7 +193,7 @@ class DarkHorseParser:
|
|
|
|
|
|
|
|
|
|
# Compile patterns objects
|
|
|
|
|
self.__regExpsPO = {}
|
|
|
|
|
for k, pattern in self.__regExps.iteritems():
|
|
|
|
|
for k, pattern in self.__regExps.items():
|
|
|
|
|
self.__regExpsPO[k] = re.compile(pattern)
|
|
|
|
|
|
|
|
|
|
self.__domTree = BasicTellicoDOM()
|
|
|
|
@ -211,7 +211,7 @@ class DarkHorseParser:
|
|
|
|
|
"""
|
|
|
|
|
Fetch HTML data from url
|
|
|
|
|
"""
|
|
|
|
|
u = urllib2.urlopen(url)
|
|
|
|
|
u = urllib.request.urlopen(url)
|
|
|
|
|
self.__data = u.read()
|
|
|
|
|
u.close()
|
|
|
|
|
|
|
|
|
@ -231,7 +231,7 @@ class DarkHorseParser:
|
|
|
|
|
The image is deleted if delete is True
|
|
|
|
|
"""
|
|
|
|
|
md5 = genMD5()
|
|
|
|
|
imObj = urllib2.urlopen(path.strip())
|
|
|
|
|
imObj = urllib.request.urlopen(path.strip())
|
|
|
|
|
img = imObj.read()
|
|
|
|
|
imObj.close()
|
|
|
|
|
imgPath = "/tmp/%s.jpeg" % md5
|
|
|
|
@ -240,7 +240,7 @@ class DarkHorseParser:
|
|
|
|
|
f.write(img)
|
|
|
|
|
f.close()
|
|
|
|
|
except:
|
|
|
|
|
print sys.stderr, "Error: could not write image into /tmp"
|
|
|
|
|
print(sys.stderr, "Error: could not write image into /tmp")
|
|
|
|
|
|
|
|
|
|
b64data = (md5 + '.jpeg', base64.encodestring(img))
|
|
|
|
|
|
|
|
|
@ -249,7 +249,7 @@ class DarkHorseParser:
|
|
|
|
|
try:
|
|
|
|
|
os.remove(imgPath)
|
|
|
|
|
except:
|
|
|
|
|
print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5
|
|
|
|
|
print(sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5)
|
|
|
|
|
|
|
|
|
|
return b64data
|
|
|
|
|
|
|
|
|
@ -286,7 +286,7 @@ class DarkHorseParser:
|
|
|
|
|
data['image'] = b64img
|
|
|
|
|
data['pub_year'] = NULLSTRING
|
|
|
|
|
|
|
|
|
|
for name, po in self.__regExpsPO.iteritems():
|
|
|
|
|
for name, po in self.__regExpsPO.items():
|
|
|
|
|
data[name] = NULLSTRING
|
|
|
|
|
if name == 'desc':
|
|
|
|
|
matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I)
|
|
|
|
@ -363,7 +363,7 @@ class DarkHorseParser:
|
|
|
|
|
if not len(title): return
|
|
|
|
|
|
|
|
|
|
self.__title = title
|
|
|
|
|
self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title)))
|
|
|
|
|
self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.parse.quote(self.__title)))
|
|
|
|
|
|
|
|
|
|
# Get all links
|
|
|
|
|
links = self.__fetchMovieLinks()
|
|
|
|
@ -381,11 +381,11 @@ class DarkHorseParser:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def halt():
|
|
|
|
|
print "HALT."
|
|
|
|
|
print("HALT.")
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
def showUsage():
|
|
|
|
|
print "Usage: %s comic" % sys.argv[0]
|
|
|
|
|
print("Usage: %s comic" % sys.argv[0])
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|