[sword-app-changelog] SF.net SVN: sword-app:[454] sss/branches/sss-2/sss

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 454
          http://sword-app.svn.sourceforge.net/sword-app/?rev=454&view=rev
Author:   richard-jones
Date:     2012-01-19 15:50:28 +0000 (Thu, 19 Jan 2012)
Log Message:
-----------
improve Statement handling and serialisation, and now support passing in an existing RDF document to add the statement information to

Modified Paths:
--------------
    sss/branches/sss-2/sss/core.py
    sss/branches/sss-2/sss/pylons_sword_controller.py
    sss/branches/sss-2/sss/repository.py
    sss/branches/sss-2/sss/webpy.py

Modified: sss/branches/sss-2/sss/core.py
===================================================================

--- sss/branches/sss-2/sss/core.py	2012-01-17 15:05:22 UTC (rev 453)
+++ sss/branches/sss-2/sss/core.py	2012-01-19 15:50:28 UTC (rev 454)
@@ -596,44 +596,12 @@
         self.error_code = None
         self.error = None
         self.receipt = None
-        
-# Operational SWORD Classes
-#############################################################################
-# Classes which carry out the grunt work of the SSS
-
-class SWORDSpec(object):
-    """
-    Class which attempts to represent the specification itself.  Instead of being operational like the SWORDServer
-    class, it attempts to just be able to interpret the supplied http headers and content bodies and turn them into
-    the entities with which SWORD works.  The jury is out, in my mind, whether this class is a useful separation, but
-    for what it's worth, here it is ...
-    """
-    def __init__(self, config):
-    
-        self.config = config
-    
-        # FIXME: this is a webpy thing ...
-        # The HTTP headers that are part of the specification (from a web.py perspective - don't be fooled, these
-        # aren't the real HTTP header names - see the spec)
-        self.sword_headers = [
-            "HTTP_ON_BEHALF_OF", "HTTP_PACKAGING", "HTTP_IN_PROGRESS", "HTTP_METADATA_RELEVANT",
-            "HTTP_CONTENT_MD5", "HTTP_SLUG", "HTTP_ACCEPT_PACKAGING"
-        ]
-
-        self.error_content_uri = "http://purl.org/net/sword/error/ErrorContent"
-        self.error_checksum_mismatch_uri = "http://purl.org/net/sword/error/ErrorChecksumMismatch"
-        self.error_bad_request_uri = "http://purl.org/net/sword/error/ErrorBadRequest"
-        self.error_target_owner_unknown_uri = "http://purl.org/net/sword/error/TargetOwnerUnknown"
-        self.error_mediation_not_allowed_uri = "http://purl.org/net/sword/error/MediationNotAllowed"
-        self.error_method_not_allowed_uri = "http://purl.org/net/sword/error/MethodNotAllowed"
-        self.error_max_upload_size_exceeded = "http://purl.org/net/sword/error/MaxUploadSizeExceeded"
-
-        
+                
 class Statement(object):
     """
     Class representing the Statement; a description of the object as it appears on the server
     """
-    def __init__(self):
+    def __init__(self, aggregation_uri=None, rem_uri=None, original_deposits=[], aggregates=[], states=[]):
         """
         The statement has 4 important properties:
         - aggregation_uri   -   The URI of the aggregation in ORE terms
@@ -642,22 +610,12 @@
         - in_progress       -   Is the submission in progress (boolean)
         - aggregates        -   the non-original deposit files associated with the item
         """
-        self.aggregation_uri = None
-        self.rem_uri = None
-        self.original_deposits = []
-        self.aggregates = []
-        self.in_progress = False
-
-        # URIs to use for the two supported states in SSS
-        self.in_progress_uri = "http://purl.org/net/sword/state/in-progress"
-        self.archived_uri = "http://purl.org/net/sword/state/archived"
-
-        # the descriptions to associated with the two supported states in SSS
-        self.states = {
-            self.in_progress_uri : "The work is currently in progress, and has not passed to a reviewer",
-            self.archived_uri : "The work has passed through review and is now in the archive"
-        }
-
+        self.aggregation_uri = aggregation_uri
+        self.rem_uri = rem_uri
+        self.original_deposits = original_deposits
+        self.aggregates = aggregates
+        self.states = states
+        
         # Namespace map for XML serialisation
         self.ns = Namespaces()
         self.smap = {"rdf" : self.ns.RDF_NS, "ore" : self.ns.ORE_NS, "sword" : self.ns.SWORD_NS}
@@ -666,7 +624,13 @@
 
     def __str__(self):
         return str(self.aggregation_uri) + ", " + str(self.rem_uri) + ", " + str(self.original_deposits)
+    
+    def add_state(self, state, state_description):
+        self.states.append((state, state_description))
         
+    def set_state(self, state, state_description):
+        self.states = [(state, state_description)]
+    
     def original_deposit(self, uri, deposit_time, packaging_format, by, obo):
         """
         Add an original deposit to the statement
@@ -691,6 +655,7 @@
         
         aggs = []
         ods = []
+        states = []
         for desc in rdf.getchildren():
             packaging = None
             depositedOn = None
@@ -707,7 +672,7 @@
                     self.rem_uri = about
                 if element.tag == self.ns.SWORD + "state":
                     state = element.get(self.ns.RDF + "resource")
-                    self.in_progress = state == "http://purl.org/net/sword/state/in-progress"
+                    states.append(state)
                 if element.tag == self.ns.SWORD + "packaging":
                     packaging = element.get(self.ns.RDF + "resource")
                 if element.tag == self.ns.SWORD + "depositedOn":
@@ -721,17 +686,26 @@
                 ods.append(about)
                 self.original_deposit(about, depositedOn, packaging, deposit_by, deposit_obo)
         
+        # now find the state descriptions
+        for desc in rdf.getchildren():
+            about = desc.get(self.ns.RDF + "about")
+            if about in states:
+                for element in desc.getchildren():
+                    if element.tag == self.ns.SWORD + "stateDescription":
+                        state_description = element.text
+                        self.add_state(about, state_description)
+        
         # sort out the ordinary aggregations from the original deposits
         self.aggregates = []
         for agg in aggs:
             if agg not in ods:
                 self.aggregates.append(agg)
 
-    def serialise(self):
+    def serialise_rdf(self, existing_rdf_as_string=None):
         """
         Serialise this statement into an RDF/XML string
         """
-        rdf = self.get_rdf_xml()
+        rdf = self.get_rdf_xml(existing_rdf_as_string)
         return etree.tostring(rdf, pretty_print=True)
 
     def serialise_atom(self):
@@ -742,11 +716,11 @@
         feed = etree.Element(self.ns.ATOM + "feed", nsmap=self.fmap)
 
         # create the sword:state term in the root of the feed
-        state_uri = self.in_progress_uri if self.in_progress else self.archived_uri
-        state = etree.SubElement(feed, self.ns.SWORD + "state")
-        state.set("href", state_uri)
-        meaning = etree.SubElement(state, self.ns.SWORD + "stateDescription")
-        meaning.text = self.states[state_uri]
+        for state_uri, state_description in self.states:
+            state = etree.SubElement(feed, self.ns.SWORD + "state")
+            state.set("href", state_uri)
+            meaning = etree.SubElement(state, self.ns.SWORD + "stateDescription")
+            meaning.text = state_description
 
         # now do an entry for each original deposit
         for (uri, datestamp, format_uri, by, obo) in self.original_deposits:
@@ -787,47 +761,138 @@
 
         return etree.tostring(feed, pretty_print=True)
 
-    def get_rdf_xml(self):
+    def _is_rem(self, rdf):
+        valid = True
+        
+        # does it meet the basic requirements of being a resource map, which 
+        # is to have an ore:describes and and ore:isDescribedBy
+        describes_uri = None
+        rem_uri = None
+        aggregation_uri = None
+        is_described_by_uris = []
+        for desc in rdf.findall(self.ns.RDF + "Description"):
+            # look for the describes tag
+            ore_desc = desc.find(self.ns.ORE + "describes")
+            if ore_desc is not None:
+                describes_uri = ore_desc.get(self.ns.RDF + "resource")
+                rem_uri = desc.get(self.ns.RDF + "about")
+            # look for the isDescribedBy tag
+            ore_idb = desc.findall(self.ns.ORE + "isDescribedBy")
+            if len(ore_idb) > 0:
+                aggregation_uri = desc.get(self.ns.RDF + "about")
+                for idb in ore_idb:
+                    is_described_by_uris.append(idb.get(self.ns.RDF + "resource"))
+        
+        # now check that all those uris tie up:
+        if describes_uri != aggregation_uri:
+            ssslog.info("Validation of Ore Statement failed; ore:describes URI does not match Aggregation URI: " +
+                        str(describes_uri) + " != " + str(aggregation_uri))
+            valid = False
+        if rem_uri not in is_described_by_uris:
+            ssslog.info("Validation of Ore Statement failed; Resource Map URI does not match one of ore:isDescribedBy URIs: " + 
+                        str(rem_uri) + " not in " + str(is_described_by_uris))
+            valid = False
+        
+        ssslog.info("Statement validation; was it a success? " + str(valid))
+        return valid
+
+    def _get_aggregation_element(self, rdf):
+        for desc in rdf.findall(self.ns.RDF + "Description"):
+            ore_idb = desc.findall(self.ns.ORE + "isDescribedBy")
+            if len(ore_idb) > 0:
+                return desc
+        return None
+
+    def _get_description_element(self, rdf, uri):
+        for desc in rdf.findall(self.ns.RDF + "Description"):
+            about = desc.get(self.ns.RDF + "about")
+            if about == uri:
+                return desc
+        return None
+
+    def get_rdf_xml(self, existing_rdf_as_string=None):
         """
         Get an lxml Element object back representing this statement
         """
 
-        # we want to create an ORE resource map, and also add on the sword specific bits for the original deposits and the state
+        # first parse in the existing rdf if necessary
+        rdf = None
+        aggregation = None
+        is_rem = False
+        if existing_rdf_as_string is not None:
+            rdf = etree.fromstring(existing_rdf_as_string)
+            is_rem = self._is_rem(rdf)
+            if is_rem:
+                aggregation = self._get_aggregation_element()
+            else:
+                aggregation = self._get_description_element(rdf, self.aggregation_uri)    
+        else:
+            # create the RDF root
+            rdf = etree.Element(self.ns.RDF + "RDF", nsmap=self.smap)
 
-        # create the RDF root
-        rdf = etree.Element(self.ns.RDF + "RDF", nsmap=self.smap)
+        # these operations ensure that an existing rdf document becomes a resource
+        # map
+        if not is_rem:
+            # in the RDF root create a Description for the REM which ore:describes the Aggregation
+            description1 = etree.SubElement(rdf, self.ns.RDF + "Description")
+            description1.set(self.ns.RDF + "about", self.rem_uri)
+            describes = etree.SubElement(description1, self.ns.ORE + "describes")
+            describes.set(self.ns.RDF + "resource", self.aggregation_uri)
 
-        # in the RDF root create a Description for the REM which ore:describes the Aggregation
-        description1 = etree.SubElement(rdf, self.ns.RDF + "Description")
-        description1.set(self.ns.RDF + "about", self.rem_uri)
-        describes = etree.SubElement(description1, self.ns.ORE + "describes")
-        describes.set(self.ns.RDF + "resource", self.aggregation_uri)
+        if aggregation is not None and not is_rem:
+            # there is already an rdf:Description for the element, but it hasn't
+            # been properly linked to the ReM yet
+            idb = etree.SubElement(aggregation, self.ns.ORE + "isDescribedBy")
+            idb.set(self.ns.RDF + "resource", self.rem_uri)
 
-        # in the RDF root create a Description for the Aggregation which is ore:isDescribedBy the REM
-        description = etree.SubElement(rdf, self.ns.RDF + "Description")
-        description.set(self.ns.RDF + "about", self.aggregation_uri)
-        idb = etree.SubElement(description, self.ns.ORE + "isDescribedBy")
-        idb.set(self.ns.RDF + "resource", self.rem_uri)
+        if aggregation is None:
+            # CREATE THE AGGREGATION
+            # in the RDF root create a Description for the Aggregation which is ore:isDescribedBy the REM
+            aggregation = etree.SubElement(rdf, self.ns.RDF + "Description")
+            aggregation.set(self.ns.RDF + "about", self.aggregation_uri)
+            idb = etree.SubElement(aggregation, self.ns.ORE + "isDescribedBy")
+            idb.set(self.ns.RDF + "resource", self.rem_uri)
 
+        # we want to create an ORE resource map, and also add on the sword specific bits for the original deposits and the state
+        
         # Create ore:aggreages for all ordinary aggregated files
+        # First build a list of all the urls which are already referred to in the existing rem
+        existing_a = []
+        existing_aggregates = aggregation.findall(self.ns.ORE + "aggregates")
+        for ea in existing_aggregates:
+            existing_a.append(ea.get(self.ns.RDF + "resource"))
         for uri in self.aggregates:
-            aggregates = etree.SubElement(description, self.ns.ORE + "aggregates")
+            if uri in existing_a:
+                continue
+            aggregates = etree.SubElement(aggregation, self.ns.ORE + "aggregates")
             aggregates.set(self.ns.RDF + "resource", uri)
+            existing_a.append(uri) # remember that we've added this aggregation, in case there are duplicates in original_deposits
 
         # Create ore:aggregates and sword:originalDeposit relations for the original deposits
+        existing_od = []
+        existing_ods = aggregation.findall(self.ns.SWORD + "originalDeposit")
+        for eo in existing_ods:
+            existing_od.append(eo.get(self.ns.RDF + "resource"))
         for (uri, datestamp, format, by, obo) in self.original_deposits:
             # standard ORE aggregates statement
-            aggregates = etree.SubElement(description, self.ns.ORE + "aggregates")
-            aggregates.set(self.ns.RDF + "resource", uri)
+            if uri not in existing_a:
+                aggregates = etree.SubElement(aggregation, self.ns.ORE + "aggregates")
+                aggregates.set(self.ns.RDF + "resource", uri)
 
             # assert that this is an original package
-            original = etree.SubElement(description, self.ns.SWORD + "originalDeposit")
-            original.set(self.ns.RDF + "resource", uri)
+            if uri not in existing_od:
+                original = etree.SubElement(aggregation, self.ns.SWORD + "originalDeposit")
+                original.set(self.ns.RDF + "resource", uri)
 
         # now do the state information
-        state_uri = self.in_progress_uri if self.in_progress else self.archived_uri
-        state = etree.SubElement(description, self.ns.SWORD + "state")
-        state.set(self.ns.RDF + "resource", state_uri)
+        for state_uri, state_description in self.states:
+            state = etree.SubElement(aggregation, self.ns.SWORD + "state")
+            state.set(self.ns.RDF + "resource", state_uri)
+            
+            sdesc = etree.SubElement(rdf, self.ns.RDF + "Description")
+            sdesc.set(self.ns.RDF + "about", state_uri)
+            meaning = etree.SubElement(sdesc, self.ns.SWORD + "stateDescription")
+            meaning.text = state_description
 
         # Build the Description elements for the original deposits, with their sword:depositedOn and sword:packaging
         # relations
@@ -851,12 +916,6 @@
                 deposit_obo.set(self.ns.RDF + "datatype", "http://www.w3.org/2001/XMLSchema#string")
                 deposit_obo.text = obo
 
-        # finally do a description for the state
-        sdesc = etree.SubElement(rdf, self.ns.RDF + "Description")
-        sdesc.set(self.ns.RDF + "about", state_uri)
-        meaning = etree.SubElement(sdesc, self.ns.SWORD + "stateDescription")
-        meaning.text = self.states[state_uri]
-
         return rdf
         
 class WebUI(object):

Modified: sss/branches/sss-2/sss/pylons_sword_controller.py
===================================================================
--- sss/branches/sss-2/sss/pylons_sword_controller.py	2012-01-17 15:05:22 UTC (rev 453)
+++ sss/branches/sss-2/sss/pylons_sword_controller.py	2012-01-19 15:50:28 UTC (rev 454)
@@ -4,7 +4,7 @@
 from pylons.templating import render_mako as render
 
 import re, base64, urllib, uuid, inspect
-from core import Auth, SWORDSpec, SwordError, AuthException, DepositRequest, DeleteRequest
+from core import Auth, SwordError, AuthException, DepositRequest, DeleteRequest
 from negotiator import ContentNegotiator, AcceptParameters, ContentType
 from spec import Errors, HttpHeaders, ValidationException
 

Modified: sss/branches/sss-2/sss/repository.py
===================================================================
--- sss/branches/sss-2/sss/repository.py	2012-01-17 15:05:22 UTC (rev 453)
+++ sss/branches/sss-2/sss/repository.py	2012-01-19 15:50:28 UTC (rev 454)
@@ -1,5 +1,5 @@
 import os, hashlib, uuid, urllib
-from core import Statement, DepositResponse, MediaResourceResponse, DeleteResponse, SWORDSpec, Auth, AuthException, SwordError, ServiceDocument, SDCollection, EntryDocument, Authenticator, SwordServer, WebUI
+from core import Statement, DepositResponse, MediaResourceResponse, DeleteResponse, Auth, AuthException, SwordError, ServiceDocument, SDCollection, EntryDocument, Authenticator, SwordServer, WebUI
 from spec import Namespaces, Errors
 from lxml import etree
 from datetime import datetime
@@ -153,7 +153,17 @@
 
         # create a URIManager for us to use
         self.um = URIManager(self.configuration)
+        
+        # URIs to use for the two supported states in SSS
+        self.in_progress_uri = "http://purl.org/net/sword/state/in-progress"
+        self.archived_uri = "http://purl.org/net/sword/state/archived"
 
+        # the descriptions to associated with the two supported states in SSS
+        self.states = {
+            self.in_progress_uri : "The work is currently in progress, and has not passed to a reviewer",
+            self.archived_uri : "The work has passed through review and is now in the archive"
+        }
+
         # build the namespace maps that we will use during serialisation
         # self.sdmap = {None : self.ns.APP_NS, "sword" : self.ns.SWORD_NS, "atom" : self.ns.ATOM_NS, "dcterms" : self.ns.DC_NS}
         self.cmap = {None: self.ns.ATOM_NS}
@@ -314,7 +324,11 @@
 
         # the Edit-URI
         edit_uri = self.um.edit_uri(collection, id)
-
+        
+        # State information
+        state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
+        state_description = self.states[state_uri]
+        
         # create the initial statement
         s = Statement()
         s.aggregation_uri = agg_uri
@@ -323,9 +337,9 @@
         obo = deposit.auth.obo if deposit.auth is not None else None
         if deposit_uri is not None:
             s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
-        s.in_progress = deposit.in_progress
         s.aggregates = derived_resource_uris
-
+        s.add_state(state_uri, state_description)
+        
         # store the statement by itself
         self.dao.store_statement(collection, id, s)
 
@@ -464,6 +478,10 @@
 
         # the Edit-URI
         edit_uri = self.um.edit_uri(collection, id)
+        
+        # State information
+        state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
+        state_description = self.states[state_uri]
 
         # create the new statement
         s = Statement()
@@ -473,7 +491,7 @@
             by = deposit.auth.by if deposit.auth is not None else None
             obo = deposit.auth.obo if deposit.auth is not None else None
             s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
-        s.in_progress = deposit.in_progress
+        s.add_state(state_uri, state_description)
         s.aggregates = derived_resource_uris
 
         # store the statement by itself
@@ -527,11 +545,15 @@
         # the Edit-URI
         edit_uri = self.um.edit_uri(collection, id)
 
+        # State information
+        state_uri = self.in_progress_uri if delete.in_progress else self.archived_uri
+        state_description = self.states[state_uri]
+    
         # create the statement
         s = Statement()
         s.aggregation_uri = agg_uri
         s.rem_uri = edit_uri
-        s.in_progress = delete.in_progress
+        s.add_state(state_uri, state_description)
 
         # store the statement by itself
         self.dao.store_statement(collection, id, s)
@@ -567,9 +589,13 @@
         if not self.exists(oid):
             raise SwordError(status=404, empty=True)
 
+        # State information
+        state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
+        state_description = self.states[state_uri]
+
         # load the statement
         s = self.dao.load_statement(collection, id)
-        s.in_progress = deposit.in_progress
+        s.set_state(state_uri, state_description)
         
         # store the content file if one exists, and do some processing on it
         location_uri = None
@@ -675,12 +701,16 @@
         if not self.exists(oid):
             raise SwordError(status=404, empty=True)
 
+        # State information
+        state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
+        state_description = self.states[state_uri]
+
         # load the statement
         s = self.dao.load_statement(collection, id)
         
         # do the in-progress first, as some deposits will be empty, and will
         # just be telling us that the client has finished working on this item
-        s.in_progress = deposit.in_progress
+        s.set_state(state_uri, state_description)
         
         # just do some useful logging
         if deposit.atom is None and deposit.content is None:
@@ -1001,7 +1031,7 @@
         """ Store the supplied statement document content in the object idenfied by the id in the specified collection """
         # store the RDF version
         sfile = os.path.join(self.configuration.store_dir, collection, id, "sss_statement.xml")
-        self.save(sfile, statement.serialise())
+        self.save(sfile, statement.serialise_rdf())
         # store the Atom Feed version
         sfile = os.path.join(self.configuration.store_dir, collection, id, "sss_statement.atom.xml")
         self.save(sfile, statement.serialise_atom())

Modified: sss/branches/sss-2/sss/webpy.py
===================================================================
--- sss/branches/sss-2/sss/webpy.py	2012-01-17 15:05:22 UTC (rev 453)
+++ sss/branches/sss-2/sss/webpy.py	2012-01-19 15:50:28 UTC (rev 454)
@@ -1,6 +1,6 @@
 import web, re, base64, urllib, uuid
 from web.wsgiserver import CherryPyWSGIServer
-from core import Auth, SWORDSpec, SwordError, AuthException, DepositRequest, DeleteRequest
+from core import Auth, SwordError, AuthException, DepositRequest, DeleteRequest
 from negotiator import ContentNegotiator, AcceptParameters, ContentType
 from spec import Errors, HttpHeaders, ValidationException
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.