From: SVN c. m. f. t. SWORD-A. p. <swo...@li...> - 2011-12-28 14:09:25
|
Revision: 378 http://sword-app.svn.sourceforge.net/sword-app/?rev=378&view=rev Author: richard-jones Date: 2011-12-28 14:09:19 +0000 (Wed, 28 Dec 2011) Log Message: ----------- minor fixes to improve compliance to behaviours recommended by sword spec Modified Paths: -------------- sss/trunk/sss.py sss/trunk/sss_client_tools.py Modified: sss/trunk/sss.py =================================================================== --- sss/trunk/sss.py 2011-12-22 13:38:31 UTC (rev 377) +++ sss/trunk/sss.py 2011-12-28 14:09:19 UTC (rev 378) @@ -80,6 +80,8 @@ "http://purl.org/net/sword/package/SimpleZip" : DefaultIngester, "http://purl.org/net/sword/package/METSDSpaceSIP" : METSDSpaceIngester } + + self.entry_ingester = DefaultEntryIngester # supply this header in the Packaging header to generate a http://purl.org/net/sword/error/ErrorContent # sword error @@ -267,7 +269,7 @@ ss = SWORDServer() web.header("Content-Type", "text/xml") return ss.list_collection(collection) - + def POST(self, collection): """ POST either an Atom Multipart request, or a simple package into the specified collection @@ -328,6 +330,13 @@ web.ctx.status = result.error_code return result.error + + + + + + + class MediaResourceContent(SwordHttpHandler): """ Class to represent the content of the media resource. This is the object which appears under atom:content@src, not @@ -1306,6 +1315,7 @@ SWORDRequest.__init__(self) # content related + self.content_type = "application/octet-stream" self.content = None self.atom = None self.filename = "unnamed.file" @@ -1444,6 +1454,8 @@ return None def get_deposit(self, web, auth=None, atom_only=False): + # FIXME: this reads files into memory, and therefore does not scale + # FIXME: this does not deal with the Media Part headers on a multipart deposit """ Take a web.py web object and extract from it the parameters and content required for a SWORD deposit. This includes determining whether this is an Atom Multipart request or not, and extracting the atom/payload where @@ -1465,6 +1477,7 @@ d.filename = self.extract_filename(dict[head]) if head == "CONTENT_TYPE": ct = dict[head] + d.content_type = ct if ct.startswith("application/atom+xml"): atom_only = True @@ -1480,7 +1493,7 @@ d.content = base64.decodestring(webin['payload']) else: # if this wasn't a multipart, then the data is in web.data(). This could be a binary deposit or - # an atom entry deposit - reply on the passed argument to determine which + # an atom entry deposit - reply on the passed/determined argument to determine which if atom_only: d.atom = web.data() else: @@ -1665,7 +1678,9 @@ # store the incoming atom document if necessary if deposit.atom is not None: - self.dao.store_atom(collection, id, deposit.atom) + entry_ingester = self.configuration.entry_ingester() + entry_ingester.ingest(collection, id, deposit.atom) + # self.dao.store_atom(collection, id, deposit.atom) # store the content file if one exists, and do some processing on it deposit_uri = None @@ -2911,6 +2926,45 @@ # in the zip file pass +class DefaultEntryIngester(object): + def __init__(self): + self.dao = DAO() + self.ns = Namespaces() + + def ingest(self, collection, id, atom): + # store the atom + self.dao.store_atom(collection, id, atom) + + # now extract the metadata + metadata = {} + entry = etree.fromstring(atom) + + # go through each element in the atom entry and just process the ones we care about + # explicitly retrieve the atom based metadata first, then we'll overwrite it later with + # the dcterms metadata where appropriate + for element in entry.getchildren(): + if element.tag == self.ns.ATOM + "title": + metadata["title"] = element.text.strip() + if element.tag == self.ns.ATOM + "updated": + metadata["date"] = element.text.strip() + if element.tag == self.ns.ATOM + "author": + authors = "" + for names in element.getchildren(): + authors += names.text.strip() + " " + metadata["creator"] = authors + if element.tag == self.ns.ATOM + "summary": + metadata["abstract"] = element.text.strip() + + # now go through and retrieve the dcterms from the entry + for element in entry.getchildren(): + if not isinstance(element.tag, basestring): + continue + + if element.tag.startswith(self.ns.DC): + metadata[element.tag[len(self.ns.DC):]] = element.text.strip() + + self.dao.store_metadata(collection, id, metadata) + # WEB SERVER ####################################################################### # This is the bit which actually invokes the web.py server when this module is run Modified: sss/trunk/sss_client_tools.py =================================================================== --- sss/trunk/sss_client_tools.py 2011-12-22 13:38:31 UTC (rev 377) +++ sss/trunk/sss_client_tools.py 2011-12-28 14:09:19 UTC (rev 378) @@ -52,7 +52,7 @@ self.entry_doc = "entry.xml" if entry_doc is None else entry_doc self.binary_content_type = "application/zip" if binary_content_type is None else binary_content_type self.mime_boundary = "===============0670350989==" if mime_boundary is None else mime_boundary - self.package_format = "http://purl.org/net/sword/package/default" if package_format is None else package_format + self.package_format = "http://purl.org/net/sword/package/SimpleZip" if package_format is None else package_format self.checksum = "2b25f82ba67284461d4a481d7a06dd28" if checksum is None else checksum self.sd_id = sd_id # can be None self.accept = accept # can be None @@ -108,12 +108,13 @@ parts.append(self.auth_url(self.sd_uri)) return " ".join(parts) - def new_deposit(self, obo=False, in_progress=False, multipart=False, checksum=False, suppress_metadata=False): + def new_deposit(self, obo=False, in_progress=False, multipart=False, atom_only=False, checksum=False, suppress_metadata=False): parts = [self.cmd, self.post] - parts.append(self.file_upload(multipart=multipart)) - parts.append(self.get_content_disp(multipart=multipart)) - parts.append(self.get_content_type(multipart=multipart)) - parts.append(self.header(self.packaging, self.package_format)) + parts.append(self.file_upload(multipart=multipart, atom_only=atom_only)) + parts.append(self.get_content_disp(multipart=multipart, atom_only=atom_only)) + parts.append(self.get_content_type(multipart=multipart, atom_only=atom_only)) + if not atom_only: + parts.append(self.header(self.packaging, self.package_format)) if self.oid: parts.append(self.header(self.slug_header, self.oid)) if obo: @@ -261,6 +262,9 @@ # with a pre-prepared id print CURL(col_id=cid, oid=str(uuid.uuid4())).new_deposit() + + # Atom only + print CURL(col_id=cid, oid=str(uuid.uuid4())).new_deposit(atom_only=True) # LIST A COLLECTION ################### @@ -273,7 +277,7 @@ print CURL(col_id=cid, oid=oid).media_resource() accept = "application/zip" - package = "http://purl.org/net/sword/package/default" + package = "http://purl.org/net/sword/package/SimpleZip" print CURL(col_id=cid, oid=oid, accept=accept, package_format=package).media_resource(packaging=True) accept = "application/zip" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |