Skip to content

Instantly share code, notes, and snippets.

@sritasngh
Last active August 5, 2021 13:45
Show Gist options
  • Save sritasngh/765c55917be0cf29b67bfd17974721ca to your computer and use it in GitHub Desktop.
Save sritasngh/765c55917be0cf29b67bfd17974721ca to your computer and use it in GitHub Desktop.
# figure out what distro we're on
DISTRO=$(lsb_release --id --short)
CODENAME=$(lsb_release --codename --short)
########################################################################
if [[ true ]]; then
echo "*** Installing $DISTRO runtime dependencies ***";
case "$DISTRO" in
Debian|Ubuntu)
apt-get $YesOpt install \
python3 python3-pip \
python3-dev libbz2-1.0 xz-utils zlib1g libxml2-dev libxslt1-dev libpopt0
;;
Fedora)
yum $YesOpt install \
python3 python3-pip \
python3-devel xz-libs zlib libxml2-devel libxslt-devel bzip2-libs libpopt0
;;
RedHatEnterprise*|CentOS)
yum $YesOpt install epel-release;
yum $YesOpt install \
rh-python3 python3-pip \
python3-devel zlib bzip2-libs xz-libs libxml2-devel libxslt-devel libpopt0
;;
*) echo "ERROR: Unknown or Unsupported $DISTRO $CODENAME release, please report to the mailing list"; exit 1;;
esac
su fossy -c "python3 -m pip install --upgrade --user pip setuptools wheel"
su fossy -c "python3 -m pip install --upgrade --user scancode-toolkit"
fi
TOP = ../..
VARS = $(TOP)/Makefile.conf
include $(VARS)
MOD_NAME = scancode
DIRS = ui agent
TESTDIR =
DIR_LOOP = @set -e; for dir in $(DIRS); do $(MAKE) -s -C $$dir $(1); done
all: VERSIONFILE scancode-all
$(call DIR_LOOP, )
test: all
$(MAKE) -C $(TESTDIR) test
coverage: all
$(MAKE) -C $(TESTDIR) coverage
VERSIONFILE:
$(call WriteVERSIONFile,$(MOD_NAME))
install: all
$(call DIR_LOOP,install)
$(INSTALL_DATA) VERSION $(DESTDIR)$(MODDIR)/$(MOD_NAME)/VERSION
$(INSTALL_DATA) $(MOD_NAME).conf $(DESTDIR)$(MODDIR)/$(MOD_NAME)/$(MOD_NAME).conf
mkdir -p $(DESTDIR)$(SYSCONFDIR)/mods-enabled
if test ! -e $(DESTDIR)$(SYSCONFDIR)/mods-enabled/$(MOD_NAME); then \
ln -s $(MODDIR)/$(MOD_NAME) $(DESTDIR)$(SYSCONFDIR)/mods-enabled; \
fi
$(INSTALL_PROGRAM) fo-scancode $(DESTDIR)$(LIBEXECDIR)/fo-scancode
uninstall:
$(call DIR_LOOP,uninstall)
rm -rf $(DESTDIR)$(MODDIR)/$(MOD_NAME)
rm -f $(DESTDIR)$(SYSCONFDIR)/mods-enabled/$(MOD_NAME)
clean:
$(call DIR_LOOP,clean)
rm -f VERSION
.PHONY: all test coverage VERSIONFILE install uninstall clean
.PHONY: scancode-all scancode-install scancode-uninstall scancode-clean scancode-Makefile
/*****************************************************************************
* SPDX-License-Identifier: GPL-2.0
* SPDX-FileCopyrightText: 2021 Sarita Singh <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
****************************************************************************/
#include "scancode_wrapper.hpp"
#include "scancode_utils.hpp"
#include <boost/tokenizer.hpp>
#include <iostream>
#include<fstream>
#define MINSCORE 50
/**
* @brief convertes start line to start byte of the matched text
*
* count number of characters before start line and add it to the
* characters before the matched text in the start line.
*
* @param filename name of the file uploaded
* @param start_line start line of the matched text by scancode
* @param match_text text inthe codefile matched by scancode
* @return start byte of the matched text on success, -1 on failure
*/
unsigned getFilePointer(const string &filename, size_t start_line,
const string &match_text) {
ifstream checkfile(filename);
string str;
if (checkfile.is_open()) {
for (size_t i = 0; i < start_line - 1; i++) {
getline(checkfile, str);
}
unsigned int file_p = checkfile.tellg();
getline(checkfile, str);
unsigned int pos = str.find(match_text);
if (pos != string::npos) {
return file_p + pos;
}
}
return -1;
}
/**
* @brief scan file with scancode-toolkit
*
* using cli command for custom template
* scancode -lc --custom-output <output> --custom-template scancode_template.html <input> --license-text
* -l flag scans for license
* -c flag scans for copyright and holder
* license score in ScanCode is percentage and
* copyright holder in scancode is author in FOSSology
* custom template provide only those information which
* user wants to see.
*
* @param state an object of class State which can provide agent Id and CliOptions
* @param file code/binary file sent by scheduler
* @return scanned data output on success, null otherwise
*/
string scanFileWithScancode(const State &state, const fo::File &file) {
FILE *in;
char buffer[512];
string command =
"/home/fossy/.local/bin/scancode -" + state.getCliOptions() +
" --custom-output - --custom-template scancode_template.html " +
file.getFileName() + " --quiet " +
((state.getCliOptions().find('l') != string::npos) ? " --license-text --license-score " + to_string(MINSCORE): "");
string result = "";
if (!(in = popen(command.c_str(), "r"))) {
cout << "could not execute scancode command: " << command << endl;
bail(1);
}
while (fgets(buffer, sizeof(buffer), in) != NULL) {
result += buffer;
}
if (pclose(in) != 0) {
cout << "could not execute scancode command: " << command << endl;
bail(1);
}
unsigned int startjson = result.find("{");
result=result.substr(startjson, string::npos);
return result;
}
/**
* @brief extract data from scancode scanned result
*
* In licenses array:
* key-> license spdx key
* score-> score of a rule to matched with the output licenes
* name-> license full name
* text_url-> license text reference url
* matched_text-> text in code file matched for the license
* start_line-> matched text start line
*
* In copyright array:
* value-> copyright statement
* start-> start line of copyright statement
*
* In holder(copyright holder) array:
* value-> copyright holder name(author in FOSSology)
* start-> start line of copyright holder
*
* @param scancodeResult scanned result by scancode
* @param filename name of the file uploaded
* @return map having key as type of scanned and value as content for the type
*/
// HACK: Use try-catch block for exception handling
map<string, vector<Match>> extractDataFromScancodeResult(const string& scancodeResult, const string& filename) {
Json::Reader scanner;
Json::Value scancodevalue;
bool isSuccessful = scanner.parse(scancodeResult, scancodevalue);
map<string, vector<Match>> result;
vector<Match> licenses;
if (isSuccessful) {
Json::Value licensearrays = scancodevalue["licenses"];
for (unsigned int i = 0; i < licensearrays.size(); i++) {
Json::Value oneresult = licensearrays[i];
string licensename = oneresult["key"].asString();
int percentage = (int)oneresult["score"].asFloat();
string full_name=oneresult["name"].asString();
string text_url=oneresult["text_url"].asString();
string match_text = oneresult["matched_text"].asString();
unsigned long start_line=oneresult["start_line"].asUInt();
string temp_text= match_text.substr(0,match_text.find("\n"));
unsigned start_pointer = getFilePointer(filename, start_line, temp_text);
unsigned length = match_text.length();
result["scancode_license"].push_back(Match(licensename,percentage,full_name,text_url,start_pointer,length));
}
Json::Value copyarrays = scancodevalue["copyrights"];
for (unsigned int i = 0; i < copyarrays.size(); i++) {
Json::Value oneresult = copyarrays[i];
string copyrightname = oneresult["value"].asString();
unsigned long start_line=oneresult["start"].asUInt();
string temp_text= copyrightname.substr(0,copyrightname.find("[\n\t]"));
unsigned start_pointer = getFilePointer(filename, start_line, temp_text);
unsigned length = copyrightname.length();
string type="scancode_statement";
result["scancode_statement"].push_back(Match(copyrightname,type,start_pointer,length));
}
Json::Value holderarrays = scancodevalue["holders"];
for (unsigned int i = 0; i < holderarrays.size(); i++) {
Json::Value oneresult = holderarrays[i];
string holdername = oneresult["value"].asString();
unsigned long start_line=oneresult["start"].asUInt();
string temp_text= holdername.substr(0,holdername.find("\n"));
unsigned start_pointer = getFilePointer(filename, start_line, temp_text);
unsigned length = holdername.length();
string type="scancode_author";
result["scancode_author"].push_back(Match(holdername,type,start_pointer,length));
}
} else {
cerr << "JSON parsing failed " << scanner.getFormattedErrorMessages()
<< endl;
}
return result;
}
2021-08-05 18:45:42 scancode [0] :: JOB[13].scancode[999.localhost]: "parsing started"
2021-08-05 18:45:42 scancode [0] :: JOB[13].scancode[999.localhost]: "parsing check started"
2021-08-05 18:45:42 scancode [0] :: JOB[13].scancode[999.localhost]: "parsing success: cliOption = l ignoreFilesWithMimeType = 0"
2021-08-05 18:45:42 scancode [0] :: JOB[13].scancode[999.localhost]: "CHECK here scancode_copyright scancode_copyright_pk_seq"
2021-08-05 18:45:42 scancode [0] :: JOB[13].scancode[999.localhost]: "CHECK here scancode_author scancode_author_pk_seq"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: "/home/fossy/.local/lib/python3.7/site-packages/cluecode/copyrights.py:3361: FutureWarning: Possible set difference at position 3"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: " re.MULTILINE | re.UNICODE"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: "/home/fossy/.local/lib/python3.7/site-packages/pygmars/lex.py:99: FutureWarning: Possible nested set at position 7"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: " for m, label in matchers"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: "Setup plugins..."
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: "/home/fossy/.local/lib/python3.7/site-packages/cluecode/copyrights.py:3361: FutureWarning: Possible set difference at position 3"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: " re.MULTILINE | re.UNICODE"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: "/home/fossy/.local/lib/python3.7/site-packages/pygmars/lex.py:99: FutureWarning: Possible nested set at position 7"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: " for m, label in matchers"
2021-08-05 18:45:46 scancode [0] :: JOB[13].scancode[999.localhost]: "Setup plugins..."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment