Created
July 4, 2023 14:55
-
-
Save demiankatz/4600bdfb9af9882ad491f74c406a8a8a to your computer and use it in GitHub Desktop.
VuFind "Subject Guide" Recommendation Module
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Guide Recommendations Module | |
* | |
* PHP version 7 | |
* | |
* Copyright (C) Villanova University 2010. | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License version 2, | |
* as published by the Free Software Foundation. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program; if not, write to the Free Software | |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
* | |
* @category VuFind | |
* @package Recommendations | |
* @author Demian Katz <[email protected]> | |
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License | |
* @link http://vufind.org/wiki/vufind2:recommendation_modules Wiki | |
*/ | |
namespace VuFindVillanova\Recommend; | |
use VuFindSearch\Command\SearchCommand; | |
/** | |
* Guide Recommendations Module | |
* | |
* @category VuFind | |
* @package Recommendations | |
* @author Demian Katz <[email protected]> | |
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License | |
* @link http://vufind.org/wiki/vufind2:recommendation_modules Wiki | |
*/ | |
class Guide implements \VuFind\Recommend\RecommendInterface | |
{ | |
/** | |
* Facets to use in lookup | |
* | |
* @var array | |
*/ | |
protected $facets; | |
/** | |
* Call number => subject map | |
* | |
* @var array | |
*/ | |
protected $map; | |
/** | |
* Recommendation results | |
* | |
* @var array | |
*/ | |
protected $results = []; | |
/** | |
* Search service | |
* | |
* @var \VuFindSearch\Service | |
*/ | |
protected $backend; | |
/** | |
* Constructor | |
* | |
* Establishes base settings for making recommendations. | |
* | |
* @param \VuFindSearch\Service $backend Search service | |
*/ | |
public function __construct(\VuFindSearch\Service $backend) | |
{ | |
$this->backend = $backend; | |
$this->facets = [ | |
'callnumber-subject' => 'Call Number Area', | |
'collection' => 'Collection', | |
]; | |
$this->map = [ | |
'A' => ['Augustine and Culture Seminar'], | |
'B' => ['Philosophy'], | |
'BC' => ['Philosophy'], | |
'BD' => ['Philosophy'], | |
'BF' => ['Psychology'], | |
'BH' => ['Philosophy'], | |
'BJ' => ['Ethics'], | |
'BL' => ['Theology and Religious Studies'], | |
'BP' => ['Theology and Religious Studies'], | |
'BM' => ['Theology and Religious Studies'], | |
'BQ' => ['Theology and Religious Studies'], | |
'BR' => ['Theology and Religious Studies'], | |
'BS' => ['Theology and Religious Studies'], | |
'BT' => ['Theology and Religious Studies'], | |
'BV' => ['Theology and Religious Studies'], | |
'BX' => ['Theology and Religious Studies'], | |
//'C' => ['History'], | |
'D' => ['History'], | |
// "area studies" disabled on 4/15/19; it is better to only | |
// recommend the History librarian for these, at least for now. | |
//'DK' => ['History', 'Russian Studies'], | |
//'DS' => ['History', 'East Asian Studies'], | |
//'DT' => ['History', 'Africana Studies'], | |
'E' => ['History'], | |
'F' => ['History'], | |
'G' => ['Geography and the Environment'], | |
'GA' => ['Geography and the Environment'], | |
'GB' => ['Geography and the Environment'], | |
'GC' => ['Geography and the Environment'], | |
'GE' => ['Geography and the Environment'], | |
'GF' => ['Geography and the Environment'], | |
'GN' => ['Sociology'], | |
'GR' => ['Sociology'], | |
'GT' => ['Sociology'], | |
'GV' => ['Sociology'], | |
'H' => ['Sociology'], | |
'HA' => ['Sociology', 'Economics and Statistics'], | |
'HB' => ['Economics and Statistics'], | |
'HC' => ['Economics and Statistics', 'History'], | |
'HD' => ['Economics and Statistics', 'History'], | |
'HE' => ['Economics and Statistics', 'Communication'], | |
'HF' => ['Business'], | |
'HG' => ['Finance'], | |
'HJ' => ['Finance'], | |
'HM' => ['Sociology'], | |
'HN' => ['Sociology', 'History'], | |
'HQ' => ['Sociology', 'Gender and Womens Studies'], | |
'HS' => ['Sociology'], | |
'HT' => ['Sociology', 'History'], | |
'HV' => ['Criminology', 'History'], | |
'HX' => ['Sociology', 'Philosophy', 'History'], | |
'J' => ['Political Science', 'Legal Studies', 'History'], | |
'K' => ['Legal Studies'], | |
'L' => ['Education'], | |
'M' => ['Music'], | |
'N' => ['Art History'], | |
'P' => ['Communication', 'Philosophy'], | |
'PA' => ['Classical Studies'], | |
'PB' => ['Romance Languages and Literatures', 'Irish Studies'], | |
'PC' => ['Romance Languages and Literatures'], | |
'PD' => ['Romance Languages and Literatures'], | |
'PE' => ['English'], | |
'PF' => ['Romance Languages and Literatures'], | |
'PG' => ['Romance Languages and Literatures', 'Russian Studies'], | |
'PH' => ['Romance Languages and Literatures'], | |
'PJ' => | |
['Romance Languages and Literatures', 'Arab and Islamic Studies'], | |
'PK' => ['Romance Languages and Literatures'], | |
'PL' => [ | |
'Romance Languages and Literatures', 'East Asian Studies', | |
'Africana Studies', | |
], | |
'PM' => ['Romance Languages and Literatures'], | |
'PN' => ['English', 'Theatre', 'Communication'], | |
'PQ' => ['Romance Languages and Literatures', 'Theatre'], | |
'PR' => ['English', 'Theatre'], | |
'PS' => ['English', 'Theatre'], | |
'PT' => ['Romance Languages and Literatures'], | |
// Disabled at request of RSSE, 4/24/19: | |
//'PZ' => ['English'], | |
'Q' => ['Biology'], | |
'QA' => ['Mathematical Sciences'], | |
'QB' => ['Astronomy and Astrophysics'], | |
'QC' => ['Physics'], | |
'QD' => ['Chemistry'], | |
'QE' => ['Engineering'], | |
'QH' => ['Biology'], | |
'QK' => ['Biology'], | |
'QL' => ['Biology'], | |
'QM' => ['Biology', 'Nursing'], | |
'QN' => ['Biology'], | |
'QP' => ['Biology', 'Nursing'], | |
'QR' => ['Biology'], | |
'R' => ['Nursing'], | |
'RG' => ['Nursing', 'Gender and Womens Studies'], | |
'S' => ['Geography and the Environment', 'Biology'], | |
'T' => ['Engineering'], | |
'TA' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TC' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TD' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TE' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TF' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TG' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TH' => ['Engineering', 'Civil and Environmental Engineering'], | |
'TJ' => ['Engineering', 'Mechanical Engineering'], | |
'TK' => ['Engineering', 'Electrical and Computer Engineering'], | |
'TL' => ['Engineering'], | |
'TN' => ['Engineering', 'Mechanical Engineering'], | |
'TP' => ['Engineering', 'Chemical Engineering'], | |
'TR' => ['Art History'], | |
'TS' => ['Engineering', 'Business'], | |
'TT' => ['Art History'], | |
'TX' => ['Sociology', 'Global Interdisciplinary Studies'], | |
'U' => ['Naval and Military Sciences', 'History'], | |
'V' => ['Naval and Military Sciences', 'History'], | |
'Z' => ['History', 'English', 'Communication'], | |
'ZA' => ['Communication'], | |
// The key here doesn't matter, but we need to get "Digital Scholarship" | |
// onto the subject list so queries can be matched up with the Digital | |
// Scholarship librarian; there is no relevant LC subject heading here. | |
'ZZZ-FAKE' => ['Digital Scholarship'], | |
]; | |
} | |
/** | |
* Store the configuration of the recommendation module. | |
* | |
* @param string $settings Settings from searches.ini. | |
* | |
* @return void | |
*/ | |
public function setConfig($settings) | |
{ | |
// No action needed | |
} | |
/** | |
* Called at the end of the Search Params objects' initFromRequest() method. | |
* This method is responsible for setting search parameters needed by the | |
* recommendation module and for reading any existing search parameters that may | |
* be needed. | |
* | |
* @param \VuFind\Search\Base\Params $params Search parameter object | |
* @param \Laminas\StdLib\Parameters $request Parameter object representing user | |
* request. | |
* | |
* @return void | |
*/ | |
public function init($params, $request) | |
{ | |
// Turn on facets in the search results: | |
foreach ($this->facets as $name => $desc) { | |
$params->addFacet($name, $desc); | |
} | |
} | |
/** | |
* Called after the Search Results object has performed its main search. This | |
* may be used to extract necessary information from the Search Results object | |
* or to perform completely unrelated processing. | |
* | |
* @param \VuFind\Search\Base\Results $results Search results object | |
* | |
* @return void | |
*/ | |
public function process($results) | |
{ | |
// Retrieve facets from Solr and query string from user: | |
$facets = $results->getFacetList($this->facets, false); | |
$lookfor = trim( | |
str_replace('"', '', $results->getParams()->getDisplayQuery()) | |
); | |
// Build the query object: | |
$query = new \VuFindSearch\Query\Query( | |
$this->getQueryFromFacetsAndInput($facets, $lookfor) | |
); | |
// Retrieve results: | |
$command = new SearchCommand('SolrWeb', $query, 0, 20); | |
$results = $this->backend->invoke($command)->getResult(); | |
$limit = ['Guides' => 3, 'Staff' => 1]; | |
foreach ($results as $doc) { | |
$category = $this->getCategory($doc); | |
if ( | |
$category | |
&& count($this->results[$category] ?? []) < $limit[$category] ?? 0 | |
) { | |
$this->results[$category][] = $doc; | |
} | |
} | |
} | |
/** | |
* Given a record driver, determine its category. | |
* | |
* @param object $doc Document | |
* | |
* @return string | |
*/ | |
protected function getCategory($doc) | |
{ | |
$category = $doc->getRawData()['category']; | |
if (in_array('Guides', $category)) { | |
return 'Guides'; | |
} | |
if (in_array('Staff', $category)) { | |
return 'Staff'; | |
} | |
return false; | |
} | |
/** | |
* Convert a facet list into a Solr query | |
* | |
* @param array $facets Facets | |
* @param string $lookfor Search terms | |
* | |
* @return string | |
*/ | |
protected function getQueryFromFacetsAndInput($facets, $lookfor) | |
{ | |
// Map facets to subject areas and build totals by area: | |
$guides = []; | |
foreach ($facets['callnumber-subject']['list'] ?? [] as $current) { | |
$code = trim(substr($current['value'], 0, 2)); | |
// If the two-letter code is not represented, switch to single-letter: | |
if (!isset($this->map[$code]) && strlen($code) > 1) { | |
$code = substr($code, 0, 1); | |
} | |
if (isset($this->map[$code])) { | |
foreach ((array)$this->map[$code] as $guide) { | |
if (!isset($guides[$guide])) { | |
$guides[$guide] = 0; | |
} | |
$guides[$guide] += $current['count']; | |
} | |
} | |
} | |
// Special case: Check for DCDE-related collections: | |
$dcdeKey = 'Special Collections, University Archives, Digital Library'; | |
foreach ($facets['collection']['list'] ?? [] as $current) { | |
if ( | |
$current['value'] == 'Digital Library' | |
|| $current['value'] == 'Special Collections' | |
|| $current['value'] == 'University Archives' | |
) { | |
if (!isset($guides[$dcdeKey])) { | |
$guides[$dcdeKey] = 0; | |
} | |
$guides[$dcdeKey] += $current['count']; | |
} | |
} | |
// If user query matches a subject area, boost that one: | |
$allSubjects = array_unique( | |
array_merge_recursive(...array_values($this->map)) | |
); | |
// Special case: redirect "digital humanities" to "digital scholarship." | |
if (strtolower($lookfor) === 'digital humanities') { | |
$lookfor = 'digital scholarship'; | |
} | |
foreach ($allSubjects as $subject) { | |
if (!empty($lookfor) && stristr($subject, $lookfor) !== false) { | |
// Give a strong boost to any subject area that exactly matches | |
// the user's query, and a smaller boost when the query is a | |
// substring of the subject area name. | |
$ratio = (strlen($lookfor) / strlen($subject)); | |
$score = $ratio >= 1 ? 1000000 : 100000 * $ratio; | |
if (!isset($guides[$subject])) { | |
$guides[$subject] = 0; | |
} | |
$guides[$subject] += (int)$score; | |
} | |
} | |
// Sort the list by total (highest first): | |
arsort($guides); | |
// Build a query from the list: | |
$subjects = []; | |
foreach ($guides as $subject => $count) { | |
if (count($subjects) >= 3) { | |
break; | |
} | |
$subjects[] = 'subject:"' . $subject . '"^' . ($count * 10) | |
. ' OR keywords:"' . $subject . '"^' . intval($count / 10); | |
} | |
$filter = '(subject_homepage_str:yes OR category:"Staff"^10000)'; | |
$queryPart = empty($subjects) | |
? 'category:Disabled' : '(' . implode(' OR ', $subjects) . ')'; | |
return $filter . ' AND ' . $queryPart; | |
} | |
/** | |
* Get final results | |
* | |
* @return array | |
*/ | |
public function getResults() | |
{ | |
return $this->results; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Guide recommendation module factory. | |
* | |
* PHP version 7 | |
* | |
* Copyright (C) Villanova University 2018. | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License version 2, | |
* as published by the Free Software Foundation. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
* | |
* @category VuFind | |
* @package Export | |
* @author Demian Katz <[email protected]> | |
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License | |
* @link https://vufind.org/wiki/development Wiki | |
*/ | |
namespace VuFindVillanova\Recommend; | |
use Interop\Container\ContainerInterface; | |
use Laminas\ServiceManager\Factory\FactoryInterface; | |
/** | |
* Guide recommendation module factory. | |
* | |
* @category VuFind | |
* @package Export | |
* @author Demian Katz <[email protected]> | |
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License | |
* @link https://vufind.org/wiki/development Wiki | |
*/ | |
class GuideFactory implements FactoryInterface | |
{ | |
/** | |
* Create an object | |
* | |
* @param ContainerInterface $container Service manager | |
* @param string $requestedName Service being created | |
* @param null|array $options Extra options (optional) | |
* | |
* @return object | |
* | |
* @throws ServiceNotFoundException if unable to resolve the service. | |
* @throws ServiceNotCreatedException if an exception is raised when | |
* creating a service. | |
* @throws ContainerException if any other error occurs | |
*/ | |
public function __invoke( | |
ContainerInterface $container, | |
$requestedName, | |
array $options = null | |
) { | |
if (!empty($options)) { | |
throw new \Exception('Unexpected options sent to factory.'); | |
} | |
return new $requestedName($container->get('VuFindSearch\Service')); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment