[sword-app-changelog] SF.net SVN: sword-app:[378] sss/trunk

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 378
          http://sword-app.svn.sourceforge.net/sword-app/?rev=378&view=rev
Author:   richard-jones
Date:     2011-12-28 14:09:19 +0000 (Wed, 28 Dec 2011)
Log Message:
-----------
minor fixes to improve compliance to behaviours recommended by sword spec

Modified Paths:
--------------
    sss/trunk/sss.py
    sss/trunk/sss_client_tools.py

Modified: sss/trunk/sss.py
===================================================================

--- sss/trunk/sss.py	2011-12-22 13:38:31 UTC (rev 377)
+++ sss/trunk/sss.py	2011-12-28 14:09:19 UTC (rev 378)
@@ -80,6 +80,8 @@
                 "http://purl.org/net/sword/package/SimpleZip" : DefaultIngester,
                 "http://purl.org/net/sword/package/METSDSpaceSIP" : METSDSpaceIngester
             }
+            
+        self.entry_ingester = DefaultEntryIngester
 
         # supply this header in the Packaging header to generate a http://purl.org/net/sword/error/ErrorContent
         # sword error
@@ -267,7 +269,7 @@
         ss = SWORDServer()
         web.header("Content-Type", "text/xml")
         return ss.list_collection(collection)
-
+        
     def POST(self, collection):
         """
         POST either an Atom Multipart request, or a simple package into the specified collection
@@ -328,6 +330,13 @@
             web.ctx.status = result.error_code
             return result.error
 
+
+
+
+
+
+
+
 class MediaResourceContent(SwordHttpHandler):
     """
     Class to represent the content of the media resource.  This is the object which appears under atom:content@src, not
@@ -1306,6 +1315,7 @@
         SWORDRequest.__init__(self)
 
         # content related
+        self.content_type = "application/octet-stream"
         self.content = None
         self.atom = None
         self.filename = "unnamed.file"
@@ -1444,6 +1454,8 @@
         return None
 
     def get_deposit(self, web, auth=None, atom_only=False):
+        # FIXME: this reads files into memory, and therefore does not scale
+        # FIXME: this does not deal with the Media Part headers on a multipart deposit
         """
         Take a web.py web object and extract from it the parameters and content required for a SWORD deposit.  This
         includes determining whether this is an Atom Multipart request or not, and extracting the atom/payload where
@@ -1465,6 +1477,7 @@
                 d.filename = self.extract_filename(dict[head])
             if head == "CONTENT_TYPE":
                 ct = dict[head]
+                d.content_type = ct
                 if ct.startswith("application/atom+xml"):
                     atom_only = True
 
@@ -1480,7 +1493,7 @@
             d.content = base64.decodestring(webin['payload'])
         else:
             # if this wasn't a multipart, then the data is in web.data().  This could be a binary deposit or
-            # an atom entry deposit - reply on the passed argument to determine which
+            # an atom entry deposit - reply on the passed/determined argument to determine which
             if atom_only:
                 d.atom = web.data()
             else:
@@ -1665,7 +1678,9 @@
 
         # store the incoming atom document if necessary
         if deposit.atom is not None:
-            self.dao.store_atom(collection, id, deposit.atom)
+            entry_ingester = self.configuration.entry_ingester()
+            entry_ingester.ingest(collection, id, deposit.atom)
+            # self.dao.store_atom(collection, id, deposit.atom)
 
         # store the content file if one exists, and do some processing on it
         deposit_uri = None
@@ -2911,6 +2926,45 @@
         # in the zip file
         pass
 
+class DefaultEntryIngester(object):
+    def __init__(self):
+        self.dao = DAO()
+        self.ns = Namespaces()
+        
+    def ingest(self, collection, id, atom):
+        # store the atom
+        self.dao.store_atom(collection, id, atom)
+        
+        # now extract the metadata
+        metadata = {}
+        entry = etree.fromstring(atom)
+
+        # go through each element in the atom entry and just process the ones we care about
+        # explicitly retrieve the atom based metadata first, then we'll overwrite it later with
+        # the dcterms metadata where appropriate
+        for element in entry.getchildren():
+            if element.tag == self.ns.ATOM + "title":
+                metadata["title"] = element.text.strip()
+            if element.tag == self.ns.ATOM + "updated":
+                metadata["date"] = element.text.strip()
+            if element.tag == self.ns.ATOM + "author":
+                authors = ""
+                for names in element.getchildren():
+                    authors += names.text.strip() + " "
+                metadata["creator"] = authors
+            if element.tag == self.ns.ATOM + "summary":
+                metadata["abstract"] = element.text.strip()
+
+        # now go through and retrieve the dcterms from the entry
+        for element in entry.getchildren():
+            if not isinstance(element.tag, basestring):
+                continue
+                
+            if element.tag.startswith(self.ns.DC):
+                metadata[element.tag[len(self.ns.DC):]] = element.text.strip()
+
+        self.dao.store_metadata(collection, id, metadata)
+
 # WEB SERVER
 #######################################################################
 # This is the bit which actually invokes the web.py server when this module is run

Modified: sss/trunk/sss_client_tools.py
===================================================================
--- sss/trunk/sss_client_tools.py	2011-12-22 13:38:31 UTC (rev 377)
+++ sss/trunk/sss_client_tools.py	2011-12-28 14:09:19 UTC (rev 378)
@@ -52,7 +52,7 @@
         self.entry_doc = "entry.xml" if entry_doc is None else entry_doc
         self.binary_content_type = "application/zip" if binary_content_type is None else binary_content_type
         self.mime_boundary = "===============0670350989==" if mime_boundary is None else mime_boundary
-        self.package_format = "http://purl.org/net/sword/package/default" if package_format is None else package_format
+        self.package_format = "http://purl.org/net/sword/package/SimpleZip" if package_format is None else package_format
         self.checksum = "2b25f82ba67284461d4a481d7a06dd28" if checksum is None else checksum
         self.sd_id = sd_id # can be None
         self.accept = accept # can be None
@@ -108,12 +108,13 @@
         parts.append(self.auth_url(self.sd_uri))
         return " ".join(parts)
 
-    def new_deposit(self, obo=False, in_progress=False, multipart=False, checksum=False, suppress_metadata=False):
+    def new_deposit(self, obo=False, in_progress=False, multipart=False, atom_only=False, checksum=False, suppress_metadata=False):
         parts = [self.cmd, self.post]
-        parts.append(self.file_upload(multipart=multipart))
-        parts.append(self.get_content_disp(multipart=multipart))
-        parts.append(self.get_content_type(multipart=multipart))
-        parts.append(self.header(self.packaging, self.package_format))
+        parts.append(self.file_upload(multipart=multipart, atom_only=atom_only))
+        parts.append(self.get_content_disp(multipart=multipart, atom_only=atom_only))
+        parts.append(self.get_content_type(multipart=multipart, atom_only=atom_only))
+        if not atom_only:
+            parts.append(self.header(self.packaging, self.package_format))
         if self.oid:
             parts.append(self.header(self.slug_header, self.oid))
         if obo:
@@ -261,6 +262,9 @@
 
     # with a pre-prepared id
     print CURL(col_id=cid, oid=str(uuid.uuid4())).new_deposit()
+    
+    # Atom only
+    print CURL(col_id=cid, oid=str(uuid.uuid4())).new_deposit(atom_only=True)
 
     # LIST A COLLECTION
     ###################
@@ -273,7 +277,7 @@
     print CURL(col_id=cid, oid=oid).media_resource()
 
     accept = "application/zip"
-    package = "http://purl.org/net/sword/package/default"
+    package = "http://purl.org/net/sword/package/SimpleZip"
     print CURL(col_id=cid, oid=oid, accept=accept, package_format=package).media_resource(packaging=True)
 
     accept = "application/zip"

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.