-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add json serialization of premis events Add a method on metsdata that handles the creation of events for mets Improve comments
- Loading branch information
1 parent
6b62121
commit db27675
Showing
1 changed file
with
110 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,123 @@ | ||
package main | ||
|
||
import ( | ||
"encoding/json" | ||
"strings" | ||
|
||
"github.com/beevik/etree" | ||
) | ||
|
||
type MetsData struct { | ||
Id int | ||
File string | ||
// Id int | ||
File string //mets-342432.xml | ||
Events []Event | ||
Agent string // e.g. Archivematica, a3m | ||
Agent string // the preservation system e.g. Archivematica, a3m | ||
EventCount int // e.g. len(events) | ||
SuccesCount int // e.g. the amount of event.outcome that are positive or pass | ||
} | ||
|
||
type Event struct { | ||
Id string `json:"id"` //uuid type of field taken from the mets | ||
OutcomeDetail string `json:detail` | ||
// Id string `json:"id"` //uuid type of field taken from the mets | ||
OutcomeDetail string `json:"outcomeDetail"` | ||
EventDetail string `json:"eventDetail"` | ||
Type string `json:"type"` //event type e.g. fixity check, creation | ||
ObjectName string `json` //premisObjectOrginalName | ||
ObjectName string `json:"name"` //premisObjectOrginalName | ||
Outcome bool `json:"outcome"` //can be empty, but this one is weird e.g. pass, Positive, etc. | ||
} | ||
|
||
// file_1,file_2,events_1,events_2,agent_1,agent_2,eventCount_1,eventCount_2,successCount_1,successCount_2 | ||
// mets-2349.xml,mets-3453.xml,{[1:{"id":"<uuid>","format": "excel", "type": "creation", "outcome": "pass"}]},{[1:{"id":"<uuid>","format": "excel", "type": "creation", "outcome": "pass"}]},Archivematica,a3m,1,1,1,1 | ||
type Conf struct { | ||
Exclude map[string]bool | ||
} | ||
|
||
// The complete paths for all the necessary items are known at | ||
// compile time So they can be laid out here. And it should be | ||
// allowable to change in the configuration, but I don't think this is | ||
// a priority feature. | ||
// Get all the amdSecs instead of searching from the root directly search | ||
// through the amdSecPath do this for each section. | ||
var amdSecPath = etree.MustCompilePath("//mets:amdSec") | ||
var eventSecPath = etree.MustCompilePath(".//premis:event") | ||
|
||
// I seperate the variables here to give further clarity as to their priority and | ||
// use. The path above is used as the roots for the paths below in my function | ||
// handle function. | ||
var ( | ||
objectNamePath = etree.MustCompilePath(".//premis:object/premis:originalName") | ||
eventTypePath = etree.MustCompilePath("./premis:eventType") | ||
eventAmountPath = etree.MustCompilePath("//premis:event/premis:eventType") | ||
// eventId = etree.MustCompilePath(".//premis:event/premis:eventIdentifierValue") | ||
agentPath = etree.MustCompilePath(".//premis:agent/premis:agentIdentifier/premis:agentIdentifierValue") | ||
eventDetailPath = etree.MustCompilePath("./premis:eventDetailInformation/eventDetail") | ||
outcomePath = etree.MustCompilePath("./premis:eventDetailInformation/eventOutcome") | ||
oDetailPath = etree.MustCompilePath("./premis:eventOutcomeDetail/eventOutcomeDetailNote") | ||
) | ||
|
||
func (md *MetsData) handleEvents(amdSec *etree.Element) { | ||
var agent string | ||
|
||
// There should only be one objectNameEle | ||
objectNameEle := amdSec.FindElementPath(objectNamePath) | ||
// There should only ever be one agent | ||
agentEles := amdSec.FindElementsPath(agentPath) | ||
|
||
// Get all events from amdSec | ||
prs := amdSec.FindElementsPath(eventSecPath) | ||
|
||
// Loop through all the elements in the amd section that have been given. | ||
for _, pr := range prs { | ||
event := &Event{ | ||
Type: pr.FindElementPath(eventTypePath).Text(), | ||
ObjectName: objectNameEle.Text(), | ||
} | ||
|
||
// Process event details, outcomes, and outcome details | ||
detailEle := pr.FindElementPath(eventDetailPath) | ||
outcomeEle := pr.FindElementPath(outcomePath) | ||
oDetailEle := pr.FindElementPath(oDetailPath) | ||
|
||
if detailEle != nil { | ||
event.EventDetail = detailEle.Text() | ||
} | ||
|
||
if outcomeEle != nil { | ||
outcomeText := strings.ToLower(outcomeEle.Text()) | ||
event.Outcome = strings.Contains(outcomeText, "pass") || | ||
strings.Contains(outcomeText, "positive") || | ||
strings.Contains(outcomeText, "transcribed") | ||
} | ||
|
||
if oDetailEle != nil { | ||
event.OutcomeDetail = oDetailEle.Text() | ||
} | ||
|
||
if event.Outcome { | ||
md.SuccesCount++ | ||
} | ||
|
||
// Append the event to the md.Events slice | ||
md.Events = append(md.Events, *event) | ||
} | ||
// TODO: Create a better abstraction that checks that the identifier | ||
// type is preservation system and then checks the value | ||
for _, agentElement := range agentEles { | ||
if strings.Contains(agentElement.Text(), "Archivematica") || | ||
strings.Contains(agentElement.Text(), "a3m") { | ||
agent = agentElement.Text() | ||
break | ||
} | ||
} | ||
md.Agent = agent | ||
} | ||
|
||
func serializeEvents(e []Event) ([][]byte, error) { | ||
jsd := make([][]byte, len(e)) | ||
var err error | ||
for i := 0; i < len(e); i++ { | ||
jsd[i], err = json.Marshal(e[i]) | ||
if err != nil { | ||
return jsd, err | ||
} | ||
|
||
} | ||
return jsd, nil | ||
} |