[Archivesspace_Users_Group] Indexing repository details in all records skews results set
Joshua D. Shaw
Joshua.D.Shaw at dartmouth.edu
Tue Jun 26 17:25:54 EDT 2018
Hi All-
I think this has been the behavior of AS from the beginning, but during some recent testing, I finally realized that AS is indexing the repository details with every record in the repository. Since part of our address is "6065 Webster Hall" and we have a *lot* of Daniel Webster related material (he's a Dartmouth alum), searching for "webster" is a bad thing since every record in the repo is listed. In a vanilla install, you can see the repository details in the json package in the results (result['json']), so that sort of made sense....
I've done some cooking of the indexer to remove the resolved repository details (result['json']['repository']['_resolved'] (and fiddle some other things), but even though the json representation of the search results contains no instance of the search string, I *still* get results based on the repository details.
Example:
Repository Name is "rauner" and the long name is "Rauner Special Collections Library"
Search: "rauner"
Example results in json for a top container and an archival object below. Note that these *do not* contain the string "rauner"
I must be missing something in how the indexer is actually storing and searching data. I'd love to know if someone has a method to remove the repository details (and anything else global) from the results to prevent this sort of thing and to cut down on erroneous results.
Thanks!
Joshua
TC:
{
"id": "/repositories/2/top_containers/53",
"uri": "/repositories/2/top_containers/53",
"title": "MS-1371b, Box 53",
"primary_type": "top_container",
"types": [
"top_container"
],
"json": "{\"lock_version\":38,\"indicator\":\"53\",\"created_by\":\"admin\",\"last_modified_by\":\"admin\",\"create_time\":\"2018-06-26T20:28:33Z\",\"system_mtime\":\"2018-06-26T21:11:11Z\",\"user_mtime\":\"2018-06-26T20:28:33Z\",\"type\":\"box\",\"jsonmodel_type\":\"top_container\",\"active_restrictions\":[],\"container_locations\":[],\"series\":[],\"collection\":[{\"ref\":\"/repositories/2/resources/1\",\"identifier\":\"MS-1371b\",\"display_string\":\"Mario Puzo papers\"}],\"uri\":\"/repositories/2/top_containers/53\",\"repository\":{\"ref\":\"/repositories/2\",\"_resolved\":\"\"},\"restricted\":false,\"is_linked_to_published_record\":false,\"display_string\":\"Box 53\",\"long_display_string\":\"MS-1371b, Box 53\"}",
"suppressed": false,
"publish": false,
"system_generated": false,
"repository": "/repositories/2",
"type_enum_s": [
"box"
],
"created_by": "admin",
"last_modified_by": "admin",
"user_mtime": "2018-06-26T20:28:33Z",
"system_mtime": "2018-06-26T21:11:11Z",
"create_time": "2018-06-26T20:28:33Z",
"display_string": "Box 53",
"collection_uri_u_sstr": [
"/repositories/2/resources/1"
],
"collection_display_string_u_sstr": [
"Mario Puzo papers"
],
"collection_identifier_stored_u_sstr": [
"MS-1371b"
],
"collection_identifier_u_stext": [
"MS-1371b",
"MS 1371b",
"MS1371b",
"MS- 1371 b"
],
"exported_u_sbool": [
false
],
"empty_u_sbool": [
false
],
"indicator_u_stext": [
"53"
],
"jsonmodel_type": "top_container"
}
AO:
{
"id": "/repositories/2/archival_objects/3",
"uri": "/repositories/2/archival_objects/3",
"title": "<emph render=\"italic\">The Fortunate Pilgrim</emph>",
"primary_type": "archival_object",
"types": [
"archival_object"
],
"json": "{\"lock_version\":0,\"position\":2,\"publish\":true,\"ref_id\":\"a97bf46cbc2cd85e9789c76098a3ee1b\",\"title\":\"<emph render=\\\"italic\\\">The Fortunate Pilgrim</emph>\",\"display_string\":\"<emph render=\\\"italic\\\">The Fortunate Pilgrim</emph>\",\"restrictions_apply\":false,\"created_by\":\"admin\",\"last_modified_by\":\"admin\",\"create_time\":\"2018-06-26T20:28:33Z\",\"system_mtime\":\"2018-06-26T21:11:11Z\",\"user_mtime\":\"2018-06-26T20:28:33Z\",\"suppressed\":false,\"level\":\"series\",\"jsonmodel_type\":\"archival_object\",\"external_ids\":[],\"subjects\":[],\"linked_events\":[],\"extents\":[],\"dates\":[],\"external_documents\":[],\"rights_statements\":[],\"linked_agents\":[],\"onbase_documents\":[],\"ancestors\":[{\"ref\":\"/repositories/2/resources/1\",\"level\":\"collection\"}],\"instances\":[],\"notes\":[],\"uri\":\"/repositories/2/archival_objects/3\",\"repository\":{\"ref\":\"/repositories/2\",\"_resolved\":\"\"},\"resource\":{\"ref\":\"/repositories/2/resources/1\"},\"has_unpublished_ancestor\":false,\"resource_identifier_u_sstr\":\"MS-1371b\",\"resource_type_u_sstr\":null,\"resource_title\":\"Mario Puzo papers\"}",
"suppressed": false,
"publish": false,
"system_generated": false,
"repository": "/repositories/2",
"level_enum_s": [
"series",
"collection"
],
"resource": "/repositories/2/resources/1",
"ref_id": "a97bf46cbc2cd85e9789c76098a3ee1b",
"created_by": "admin",
"last_modified_by": "admin",
"user_mtime": "2018-06-26T20:28:33Z",
"system_mtime": "2018-06-26T21:11:11Z",
"create_time": "2018-06-26T20:28:33Z",
"notes": "",
"level": "series",
"ancestors": [
"/repositories/2/resources/1"
],
"total_restrictions_u_sstr": [
"false"
],
"resource_identifier_u_sstr": [
"MS-1371b"
],
"resource_title_u_sstr": [
"Mario Puzo papers"
],
"resource_identifier_w_title_u_sstr": [
"MS-1371b: Mario Puzo papers"
],
"jsonmodel_type": "archival_object"
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lyralists.lyrasis.org/pipermail/archivesspace_users_group/attachments/20180626/f6ec87bf/attachment.html>
More information about the Archivesspace_Users_Group
mailing list