Skip to content

Instantly share code, notes, and snippets.

@demiankatz
Created July 4, 2023 14:55
Show Gist options
  • Save demiankatz/4600bdfb9af9882ad491f74c406a8a8a to your computer and use it in GitHub Desktop.
Save demiankatz/4600bdfb9af9882ad491f74c406a8a8a to your computer and use it in GitHub Desktop.
VuFind "Subject Guide" Recommendation Module
<?php
/**
* Guide Recommendations Module
*
* PHP version 7
*
* Copyright (C) Villanova University 2010.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* @category VuFind
* @package Recommendations
* @author Demian Katz <[email protected]>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link http://vufind.org/wiki/vufind2:recommendation_modules Wiki
*/
namespace VuFindVillanova\Recommend;
use VuFindSearch\Command\SearchCommand;
/**
* Guide Recommendations Module
*
* @category VuFind
* @package Recommendations
* @author Demian Katz <[email protected]>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link http://vufind.org/wiki/vufind2:recommendation_modules Wiki
*/
class Guide implements \VuFind\Recommend\RecommendInterface
{
/**
* Facets to use in lookup
*
* @var array
*/
protected $facets;
/**
* Call number => subject map
*
* @var array
*/
protected $map;
/**
* Recommendation results
*
* @var array
*/
protected $results = [];
/**
* Search service
*
* @var \VuFindSearch\Service
*/
protected $backend;
/**
* Constructor
*
* Establishes base settings for making recommendations.
*
* @param \VuFindSearch\Service $backend Search service
*/
public function __construct(\VuFindSearch\Service $backend)
{
$this->backend = $backend;
$this->facets = [
'callnumber-subject' => 'Call Number Area',
'collection' => 'Collection',
];
$this->map = [
'A' => ['Augustine and Culture Seminar'],
'B' => ['Philosophy'],
'BC' => ['Philosophy'],
'BD' => ['Philosophy'],
'BF' => ['Psychology'],
'BH' => ['Philosophy'],
'BJ' => ['Ethics'],
'BL' => ['Theology and Religious Studies'],
'BP' => ['Theology and Religious Studies'],
'BM' => ['Theology and Religious Studies'],
'BQ' => ['Theology and Religious Studies'],
'BR' => ['Theology and Religious Studies'],
'BS' => ['Theology and Religious Studies'],
'BT' => ['Theology and Religious Studies'],
'BV' => ['Theology and Religious Studies'],
'BX' => ['Theology and Religious Studies'],
//'C' => ['History'],
'D' => ['History'],
// "area studies" disabled on 4/15/19; it is better to only
// recommend the History librarian for these, at least for now.
//'DK' => ['History', 'Russian Studies'],
//'DS' => ['History', 'East Asian Studies'],
//'DT' => ['History', 'Africana Studies'],
'E' => ['History'],
'F' => ['History'],
'G' => ['Geography and the Environment'],
'GA' => ['Geography and the Environment'],
'GB' => ['Geography and the Environment'],
'GC' => ['Geography and the Environment'],
'GE' => ['Geography and the Environment'],
'GF' => ['Geography and the Environment'],
'GN' => ['Sociology'],
'GR' => ['Sociology'],
'GT' => ['Sociology'],
'GV' => ['Sociology'],
'H' => ['Sociology'],
'HA' => ['Sociology', 'Economics and Statistics'],
'HB' => ['Economics and Statistics'],
'HC' => ['Economics and Statistics', 'History'],
'HD' => ['Economics and Statistics', 'History'],
'HE' => ['Economics and Statistics', 'Communication'],
'HF' => ['Business'],
'HG' => ['Finance'],
'HJ' => ['Finance'],
'HM' => ['Sociology'],
'HN' => ['Sociology', 'History'],
'HQ' => ['Sociology', 'Gender and Womens Studies'],
'HS' => ['Sociology'],
'HT' => ['Sociology', 'History'],
'HV' => ['Criminology', 'History'],
'HX' => ['Sociology', 'Philosophy', 'History'],
'J' => ['Political Science', 'Legal Studies', 'History'],
'K' => ['Legal Studies'],
'L' => ['Education'],
'M' => ['Music'],
'N' => ['Art History'],
'P' => ['Communication', 'Philosophy'],
'PA' => ['Classical Studies'],
'PB' => ['Romance Languages and Literatures', 'Irish Studies'],
'PC' => ['Romance Languages and Literatures'],
'PD' => ['Romance Languages and Literatures'],
'PE' => ['English'],
'PF' => ['Romance Languages and Literatures'],
'PG' => ['Romance Languages and Literatures', 'Russian Studies'],
'PH' => ['Romance Languages and Literatures'],
'PJ' =>
['Romance Languages and Literatures', 'Arab and Islamic Studies'],
'PK' => ['Romance Languages and Literatures'],
'PL' => [
'Romance Languages and Literatures', 'East Asian Studies',
'Africana Studies',
],
'PM' => ['Romance Languages and Literatures'],
'PN' => ['English', 'Theatre', 'Communication'],
'PQ' => ['Romance Languages and Literatures', 'Theatre'],
'PR' => ['English', 'Theatre'],
'PS' => ['English', 'Theatre'],
'PT' => ['Romance Languages and Literatures'],
// Disabled at request of RSSE, 4/24/19:
//'PZ' => ['English'],
'Q' => ['Biology'],
'QA' => ['Mathematical Sciences'],
'QB' => ['Astronomy and Astrophysics'],
'QC' => ['Physics'],
'QD' => ['Chemistry'],
'QE' => ['Engineering'],
'QH' => ['Biology'],
'QK' => ['Biology'],
'QL' => ['Biology'],
'QM' => ['Biology', 'Nursing'],
'QN' => ['Biology'],
'QP' => ['Biology', 'Nursing'],
'QR' => ['Biology'],
'R' => ['Nursing'],
'RG' => ['Nursing', 'Gender and Womens Studies'],
'S' => ['Geography and the Environment', 'Biology'],
'T' => ['Engineering'],
'TA' => ['Engineering', 'Civil and Environmental Engineering'],
'TC' => ['Engineering', 'Civil and Environmental Engineering'],
'TD' => ['Engineering', 'Civil and Environmental Engineering'],
'TE' => ['Engineering', 'Civil and Environmental Engineering'],
'TF' => ['Engineering', 'Civil and Environmental Engineering'],
'TG' => ['Engineering', 'Civil and Environmental Engineering'],
'TH' => ['Engineering', 'Civil and Environmental Engineering'],
'TJ' => ['Engineering', 'Mechanical Engineering'],
'TK' => ['Engineering', 'Electrical and Computer Engineering'],
'TL' => ['Engineering'],
'TN' => ['Engineering', 'Mechanical Engineering'],
'TP' => ['Engineering', 'Chemical Engineering'],
'TR' => ['Art History'],
'TS' => ['Engineering', 'Business'],
'TT' => ['Art History'],
'TX' => ['Sociology', 'Global Interdisciplinary Studies'],
'U' => ['Naval and Military Sciences', 'History'],
'V' => ['Naval and Military Sciences', 'History'],
'Z' => ['History', 'English', 'Communication'],
'ZA' => ['Communication'],
// The key here doesn't matter, but we need to get "Digital Scholarship"
// onto the subject list so queries can be matched up with the Digital
// Scholarship librarian; there is no relevant LC subject heading here.
'ZZZ-FAKE' => ['Digital Scholarship'],
];
}
/**
* Store the configuration of the recommendation module.
*
* @param string $settings Settings from searches.ini.
*
* @return void
*/
public function setConfig($settings)
{
// No action needed
}
/**
* Called at the end of the Search Params objects' initFromRequest() method.
* This method is responsible for setting search parameters needed by the
* recommendation module and for reading any existing search parameters that may
* be needed.
*
* @param \VuFind\Search\Base\Params $params Search parameter object
* @param \Laminas\StdLib\Parameters $request Parameter object representing user
* request.
*
* @return void
*/
public function init($params, $request)
{
// Turn on facets in the search results:
foreach ($this->facets as $name => $desc) {
$params->addFacet($name, $desc);
}
}
/**
* Called after the Search Results object has performed its main search. This
* may be used to extract necessary information from the Search Results object
* or to perform completely unrelated processing.
*
* @param \VuFind\Search\Base\Results $results Search results object
*
* @return void
*/
public function process($results)
{
// Retrieve facets from Solr and query string from user:
$facets = $results->getFacetList($this->facets, false);
$lookfor = trim(
str_replace('"', '', $results->getParams()->getDisplayQuery())
);
// Build the query object:
$query = new \VuFindSearch\Query\Query(
$this->getQueryFromFacetsAndInput($facets, $lookfor)
);
// Retrieve results:
$command = new SearchCommand('SolrWeb', $query, 0, 20);
$results = $this->backend->invoke($command)->getResult();
$limit = ['Guides' => 3, 'Staff' => 1];
foreach ($results as $doc) {
$category = $this->getCategory($doc);
if (
$category
&& count($this->results[$category] ?? []) < $limit[$category] ?? 0
) {
$this->results[$category][] = $doc;
}
}
}
/**
* Given a record driver, determine its category.
*
* @param object $doc Document
*
* @return string
*/
protected function getCategory($doc)
{
$category = $doc->getRawData()['category'];
if (in_array('Guides', $category)) {
return 'Guides';
}
if (in_array('Staff', $category)) {
return 'Staff';
}
return false;
}
/**
* Convert a facet list into a Solr query
*
* @param array $facets Facets
* @param string $lookfor Search terms
*
* @return string
*/
protected function getQueryFromFacetsAndInput($facets, $lookfor)
{
// Map facets to subject areas and build totals by area:
$guides = [];
foreach ($facets['callnumber-subject']['list'] ?? [] as $current) {
$code = trim(substr($current['value'], 0, 2));
// If the two-letter code is not represented, switch to single-letter:
if (!isset($this->map[$code]) && strlen($code) > 1) {
$code = substr($code, 0, 1);
}
if (isset($this->map[$code])) {
foreach ((array)$this->map[$code] as $guide) {
if (!isset($guides[$guide])) {
$guides[$guide] = 0;
}
$guides[$guide] += $current['count'];
}
}
}
// Special case: Check for DCDE-related collections:
$dcdeKey = 'Special Collections, University Archives, Digital Library';
foreach ($facets['collection']['list'] ?? [] as $current) {
if (
$current['value'] == 'Digital Library'
|| $current['value'] == 'Special Collections'
|| $current['value'] == 'University Archives'
) {
if (!isset($guides[$dcdeKey])) {
$guides[$dcdeKey] = 0;
}
$guides[$dcdeKey] += $current['count'];
}
}
// If user query matches a subject area, boost that one:
$allSubjects = array_unique(
array_merge_recursive(...array_values($this->map))
);
// Special case: redirect "digital humanities" to "digital scholarship."
if (strtolower($lookfor) === 'digital humanities') {
$lookfor = 'digital scholarship';
}
foreach ($allSubjects as $subject) {
if (!empty($lookfor) && stristr($subject, $lookfor) !== false) {
// Give a strong boost to any subject area that exactly matches
// the user's query, and a smaller boost when the query is a
// substring of the subject area name.
$ratio = (strlen($lookfor) / strlen($subject));
$score = $ratio >= 1 ? 1000000 : 100000 * $ratio;
if (!isset($guides[$subject])) {
$guides[$subject] = 0;
}
$guides[$subject] += (int)$score;
}
}
// Sort the list by total (highest first):
arsort($guides);
// Build a query from the list:
$subjects = [];
foreach ($guides as $subject => $count) {
if (count($subjects) >= 3) {
break;
}
$subjects[] = 'subject:"' . $subject . '"^' . ($count * 10)
. ' OR keywords:"' . $subject . '"^' . intval($count / 10);
}
$filter = '(subject_homepage_str:yes OR category:"Staff"^10000)';
$queryPart = empty($subjects)
? 'category:Disabled' : '(' . implode(' OR ', $subjects) . ')';
return $filter . ' AND ' . $queryPart;
}
/**
* Get final results
*
* @return array
*/
public function getResults()
{
return $this->results;
}
}
<?php
/**
* Guide recommendation module factory.
*
* PHP version 7
*
* Copyright (C) Villanova University 2018.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* @category VuFind
* @package Export
* @author Demian Katz <[email protected]>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link https://vufind.org/wiki/development Wiki
*/
namespace VuFindVillanova\Recommend;
use Interop\Container\ContainerInterface;
use Laminas\ServiceManager\Factory\FactoryInterface;
/**
* Guide recommendation module factory.
*
* @category VuFind
* @package Export
* @author Demian Katz <[email protected]>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link https://vufind.org/wiki/development Wiki
*/
class GuideFactory implements FactoryInterface
{
/**
* Create an object
*
* @param ContainerInterface $container Service manager
* @param string $requestedName Service being created
* @param null|array $options Extra options (optional)
*
* @return object
*
* @throws ServiceNotFoundException if unable to resolve the service.
* @throws ServiceNotCreatedException if an exception is raised when
* creating a service.
* @throws ContainerException if any other error occurs
*/
public function __invoke(
ContainerInterface $container,
$requestedName,
array $options = null
) {
if (!empty($options)) {
throw new \Exception('Unexpected options sent to factory.');
}
return new $requestedName($container->get('VuFindSearch\Service'));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment