Blackhawk95 · July 11, 2018 12:23
diff --git a/Internship_report.tex b/Internship_report.tex
 \documentclass[12pt,a4paper]{report}
 \usepackage[utf8]{inputenc}
 \usepackage{amsmath}
 \usepackage{amsfonts}
 \usepackage{amssymb}
 \usepackage{graphicx}
 \usepackage{booktabs}
 \usepackage{algorithm}
 \usepackage{algpseudocode}
 \usepackage{subcaption}
 \usepackage[english]{babel}
 \usepackage[export]{adjustbox}
 \usepackage{enumerate}
 \usepackage[left=3.8cm,right=2.5cm,top=2.5cm,bottom=2.5cm]{geometry}
 \usepackage{lineno}
 \usepackage{cite}
 \usepackage{acronym}
 \usepackage{titletoc}
 \usepackage{blindtext}
 \renewcommand{\baselinestretch}{1.5}
 \usepackage{xpatch}



 \AtBeginDocument{%
  \addtocontents{toc}{\textbf{Title} ~\hfill \textbf{Page}\par}
  \addtocontents{lof}{\textbf{Figure No.} ~\hfill \textbf{Title} ~\hfill \textbf{Page}\par}
  \addtocontents{lot}{\textbf{Table No} ~\hfill \textbf{Title} ~\hfill \textbf{Page}\par}
 }

 \makeatletter
 \xpatchcmd{\@makeschapterhead}{%
  \Huge \bfseries  #1\par\nobreak%
 }{%
  \Huge \bfseries\centering #1\par\nobreak%
 }{\typeout{Patched makeschapterhead}}{\typeout{patching of @makeschapterhead failed}}

 \xpatchcmd{\@makechapterhead}{%
  \huge\bfseries \@chapapp\space \thechapter
 }{%
  \huge\bfseries\centering \@chapapp\space \thechapter
 }{\typeout{Patched @makechapterhead}}{\typeout{Patching of @makechapterhead failed}}
 \makeatother

 \begin{document}

 	\begin{center}
 			\bf{MALWARE DETECTION IN ANDROID USING MACHINE LEARNING ON CHIP\\}
 		\vspace*{35pt}
 		\textbf{
 			\it{submitted in partial fulfillment of the requirements \\for the completion of second semester}\\}
 		\vspace{40pt}
 		\textbf{Master of Technology\\}
 		in\\
 		\textbf{Computer Science and Engineering}\\
 		\vspace{10pt}
 		\textbf{by\\
 			\vspace{20pt}
 			ABHIJITH M\\
 			\vspace{8pt}
 			(206117024)}\\
 		\vspace{10pt}
 		
 		\vspace{60pt}
 		\includegraphics[width=0.3\textwidth]{./logo.png} \\
 		\vspace{30pt}
 		\textbf{DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING\\
 			NATIONAL INSTITUTE OF TECHNOLOGY\\
 			TIRUCHIRAPPALLI – 620015,\\
 			July 2018
 		}
 	\end{center}
 \pagenumbering{gobble}
 \clearpage
 \newcommand{\RN}[1]{%
 	\textup{\uppercase\expandafter{\romannumeral#1}}%
 }
 \pagenumbering{roman}

 \begin{center}
 	\section*{BONAFIDE CERTIFICATE}
 \end{center}
 \vspace{20pt}
 \addcontentsline{toc}{section}{\textbf{BONAFIDE CERTIFICATE}}
 This is to certify that the internship work titled \textbf{ MALWARE DETECTION IN ANDROID USING MACHINE LEARNING ON CHIP} is a bonafide record of the work done by 
 \begin{center}
    \textbf{ABHIJITH M (206117024)}
 \end{center}
 under my supervision and guidance, in partial fulfillment of the requirements for the completion of second semester of \textbf{Master of Technology in Computer Science and Engineering} at \textbf{NATIONAL INSTITUTE OF TECHNOLOGY, TIRUCHIRAPPALLI}, during the academic year 2017-2018.
 \vspace{40pt}
 \begin{flushright}
 	\textbf{Prof. N. Ramasubramanian \\}
 	Internship Guide
 \end{flushright}

 \clearpage
 \begin{center}
    \section*{ABSTRACT}
 \end{center}
 \addcontentsline{toc}{section}{\textbf{ABSTRACT}}
 \vspace{10pt}
 The widespread use and general purpose computing capabilities of smartphones make them the next big targets of malicious software (malware) and security attacks. There exist malware detection softwares in desktop environment that does signature based identification ,heuristic analysis of instructions or even analysis of sandboxed applications to detect malwares which are computationally intensive. There also exist cloud services which will do this computationally intensive workload in cloud itself and provide users with just the end result. But given the battery, computing power, and band-width limitations inherent to mobile devices, malware detection on these devices based on software or cloud are not much exciting research topics. In our proposed method to detect malware, we are planning to incorporate the feed forward section of a neural network on FPGA, after being trained in a powerful desktop computer. This proof of concept work is done in such a way that the obtained IP can be later be embedded to a mobile SOC as a dedicated Malware Processing Unit. The hardware simulation showed accuracy around 89.59\% using a simple ANN with one hidden layer on Digilent's Zybo evaluation board with Xilinx Zynq-7000 family APSOC.\\
 \textbf{Keywords}
 : Machine Learning (ML), Artificial Neural Network(ANN) ,Android Malware, FPGA
 \clearpage
 \begin{center}
 	\section*{ACKNOWLEDGEMENT}
 \end{center}\addcontentsline{toc}{section}{\textbf{ACKNOWLEDGEMENT}}
 \vspace{20pt}
 First of all, I thank God Almighty for giving me all the blessings to this venture and for helping me in finishing the internship successfully.
 \par
 This internship could not have been successfully completed without the help and cooperation received from some distinguished people.
 \par
 I express my sincere thanks and gratitude to \textbf{Prof. N. Ramasubramanian}, Professor, Department of Computer Science and Engineering, National Institute of Technology, Tiruchirappalli and \textbf{Ms. B. Shameedha Begum}, Assistant Professor, Department of Computer Science and Engineering, National Institute of Technology, Tiruchirappalli who guided me with valuable inputs from time to time during my project work and enabled me complete my work successfully.
 \par
 I am grateful to \textbf{Dr. R. Leela Velusamy}, Associate Professor and Head of the Department, Department of Computer Science and Engineering, National Institute of Technology, Tiruchirappalli for the facilities she has provided in the department without which the internship could not have been completed.
 \par
 I also wish to extend my gratitude to \textbf{Dr. M. Sridevi}, Assistant Professor, Department of Computer Science and Engineering for giving valuable suggestions and opinions during the internship period.
 \par
 I am also thankful to the faculty and staff members of Department of Computer Science and Engineering, my friends for their support and help. This project work has been carried out by using the facilities of \textbf{RISE} lab.
 \vspace{20pt}
 \begin{flushright}
 	ABHIJITH M \\(206117024)
 \end{flushright}
 \clearpage
 \renewcommand{\contentsname}{\centering TABLE OF CONTENTS}
 \tableofcontents

 \listoftables
 \addcontentsline{toc}{section}{List of Tables}

 \listoffigures
 \addcontentsline{toc}{section}{List of Figures}

 \chapter{INTRODUCTION}

 \pagenumbering{arabic}
 \setcounter{page}{1}
 Malware threats are present in almost all software architectures from devices firmware to super-computers. Smart devices are not an exception. Android is now the main target for these attacks, and its markets have become a sensitive bridge where attackers aim to introduce their malicious apps to get control over users mobiles.
 \par
 The reduction in cost of silicon and die area happened due to advancements in modern VLSI technology lead to perception of using dedicated hardware along with general purpose processors to enhance specific workloads. Such dedicated hardware range from video encoders to neural processing units incorporated in modern smartphones from likes of Google, Huawei etc. 
 \par
 Designing such dedicated hardware now is much easier, as FPGA's become cheaper and widely available. FPGA manufactures are also making it easy to develop IPs by supporting high level languages such as C, C++ for synthesis. It enables software engineers to develop IPs quickly and to perform fast iterations to meet the standards they are aiming for. 
 \par
 Machine learning algorithms is becoming widely used for all sort of classification problem. Deploying ML algorithm for malware detection is a widely studied topic in research communities because of their ability to detect zero day attacks by adapting quickly to newer attack vectors which are similar to what the older malwares had. Their good "learning property" enable them to stay relevant always.
 \section{Android Malwares}
 Malware is any software intentionally designed to cause damage to a computer or computational device. Malware does it after gaining access to the device in the form of executables, scripts and active contents. In Android OS, devices gets mostly infected by malware by installing malicious apps or Application Packages(APKs). Android OS provide way to install Applications through Google owned Play Store and it does have a cloud based malware detection solution called Bouncer. But a large number of applications are installed from outside sources. These sources may contain malicious APKs, and can infect devices.
 \par
 Malwares are usually detected and removed by running an anti malware software on devices. These software detect malware through mechanisms such as signature based detection, heuristic based mechanisms, behavioural pattern detection and cloud based.
 \par
 In signature based detection, software tries to match static fingerprints of malicious codes with fingerprints of files and programs. 
 Heuristic based mechanisms tries to look for malicious instructions inside applications based on known malwares.
 Behavioural detection tries to detect malicious activities like attempting to perform read operation on protected data or adjusting system settings without prior user permissions etc.
 Cloud based detection runs the app in simulated sandboxed environment in cloud and analyse the activity, which is an effective method to identify malicious application.
 \par
 Static method of detection is easier to perform but are less effective aganist modern malwares, as most of the modern malwares tries to obtain malicious code during run-time, which can be prevented by dynamic methods such as Behavioural or cloud based mechanisms. But these mechanisms require a powerful system to perform realtime analysis of malware and is also dangerous. Cloud based mechanism also requires a good network connection. 
 \par
 \begin{figure}
    \centering
    \includegraphics[scale=0.8]{apk.jpg}
    \caption{Android APK structure}
    \label{fig:apk}
 \end{figure}
 Android APK is an archive file containing dalvik executable required to run the application and an android manifest file containing the metadata of application stored in encoded XML format. Since application's metadata is present in a seperate file, "AndroidManifest.xml" can be considered as a unique fingerprint of an app. Android apps uses permissions to access different services provided by OS, which otherwise is not allowed to use. These permissions should be granted by user during installation of app or during runtime. An example of such a permission is "READ\_CALL\_LOG" which allows the ability of app to view call logs in a phone, and it is used in applications such as Contacts App. Malicious applications tricks users to give these permission such that these applications can extract personal details from devices, show intrusive ads, use device for DDOS attacking or even steal bank account details stored in device. This permission pattern can be used as signature for detecting malwares.
 \section{Machine Learning}
 Machine learning(ML) is a subset of artificial intelligence in the field of computer science that often uses statistical techniques to give computers the ability to "learn" with data, without being explicitly programmed. 
 There are 3 kinds of learning mechanism, they are Supervised learning, Unsupervised learning, Reinforcement learning.
 In supervised learning, computer is presented with example inputs and their desired outputs, and the goal of computer is to learn a general rule that maps inputs to outputs.
 In unsupervised learning, labels are not given to the learning algorithm, leaving it on its own to find structure in its input. Unsupervised learning can be a goal in itself (discovering hidden patterns in data) or a means towards an end (feature learning).
 In Reinforcement learning, the machine is concerned with how software agents ought to take actions in an environment so as to maximize some notion of cumulative reward.
 \par 
 It is used to solve problems of different types. Some of them are classification, regression, clustering etc.
 In classification, inputs are divided into two or more classes, and the learner must produce a model that assigns unseen inputs to one or more of these classes. This is typically tackled in a supervised manner.
 Regression is a supervised approach with the outputs being continuous rather than discrete.
 In clustering, a set of inputs is to be divided into groups. The groups are not known beforehand which makes it an unsupervised task.
 \par
 There are different kind of approaches to perform machine learning, some of them are decision tree based, Support vector machine, Bayesian network, Artificial neural networks etc.
 \par
 Artificial neural networks is a learning algorithm that is inspired by biological neural networks. Computations are structured in terms of an interconnected group of artificial neurons, processing information using a connectionist approach to computation. They are usually used to model complex relationships between inputs and outputs, to find patterns in data, or to capture the statistical structure in an unknown joint probability distribution between observed variables.
 \section{FPGA and AP SOC}
 A field-programmable gate array (FPGA) is an integrated circuit designed to be configured by a designer after manufacturing hence called "field-programmable". The FPGA configuration is generally specified using a hardware description language (HDL). FPGAs contain an array of programmable logic blocks, and a hierarchy of reconfigurable interconnects that allow the blocks to be "wired together", like many logic gates that can be inter-wired in different configurations. Logic blocks can be configured to perform complex combinational functions, or merely simple logic gates like AND and XOR. In most FPGAs, logic blocks also include memory elements, which may be simple flip-flops or more complete blocks of memory.
 \par
 Certain applications of FPGA requires running a companion software which reduces the portability of such devices. Xilinx, a leading manufacturer of FPGA, came up with a class of devices called "All programmable System on chip" to mitigate this problem. All programmable System on chip (AP SOC) is integrated cicruit which comprises of a programmable logic, processing subsystem and an interface connecting both. Programmable logic(PL) comprises of FPGA logic fabric and processing subsystem(PS) comprises of general purpose computing hardware consisting of processing cores, memory and supporting controllers. These allows required software to run on PS which interacts with PL over an interface. Zynq is an example of such an AP SOC which marries both PL and PS in one package provided by Xilinx.
 \par
 \begin{figure}
    \centering
    \includegraphics[scale=0.5]{Zynq.jpg}
    \caption{Zynq 7000 series AP SOC}
    \label{fig:zynq}
 \end{figure}
 \par
 Vivado Design Suite is a software suite produced by Xilinx for synthesis and analysis of HDL designs, features for system on a chip development and high-level synthesis. Vivado enables developers to synthesize their designs, perform timing analysis, examine RTL diagrams, simulate a design's reaction to different stimuli, and configure the target device.
 Vivado Design Suite consist of a high level synthesis tool which enables C, C++ and SystemC programs to be directly targeted into Xilinx devices without the need to manually create RTL, a vivado simulator which performs simulation of compiled-language that supports mixed-language, TCL scripts, encrypted IP and enhanced verification and an IP integrator. It works with Xilinx Software Development Kit (SDK) which enables users to write code that runs in processing subsystem, with live in-system instrumentation and performance visualization to quickly find system performance bottlenecks.
 \chapter{LITERATURE REVIEW}
 Several researchers developed different strategies to do malware detection based on static analysis. Zarni Aung et al. (2013) came up with a method to do permission based static analysis of APKs. In their method, they used a one-hot vector to represent android permissions, presence of a permission indicated by one and its absence a zero. They used K-means clustering and decision tree for classification.
 \par
 Daniel Arp et al. (2014) performed a broad static analysis and created a huge dataset of benignwares and malwares. They were able to outperform several similar methods by coming up with higher accuracy and lower false alarms in static analysis. They used Support Vector machine as their classifier.
 \par
 Jin Li et al. (2018) came up with high detection accuracy while having minimum computation by reducing the number of permissions required by 22 ,by finding out those permissions which maximum number of malware uses and also removing those permission which have high probable of co-existence. They reduced the required significant permission to 22. By using a support vector machine, they achieved around accuracy of 94\%.
 \par
 Javier Duarte et al. (2018) is working on an way to process data from particle collider at high speed. For that they need fast inference neural networks and which are easy for deploying. They are working on a compiler package called hls4ml which will allow them to do so. By studying their methodology, we were able to apply their solution in small scale for our purpose.  
 \chapter{PROPOSED SOLUTION}
 Since malware signature can be used to identify malicious apps and android permissions in manifest file can be used as an app's fingerprint, malwares can be detected using permissions. Artificial neural network is proposed to be used as the classifier because the inference part of it i.e, Feed forward network can easily be converted to an IP block using High level synthesis language, which can be synthesized onto the FPGA board.
 \section{Neural Network Model}
 A simple 3 layer neural network consisting of a hidden layer, an input and an output layer is created with a fully connected feed forward network using sigmoid activation function and a back propagation network which performs gradient descent to modify the weights in feed forward network. Input layer takes in 21 inputs as a one hot vector, 1 for each available permission in app and 0 for their absence. Output obtained is a vector of size 2, which indicate whether the permission set given as input corresponds to a benignware or malware. The obtained result is compared with actual data and the mean square error is propagated back into network. The weights thus generated are exported as a valid weight matrix which can be imported as a header file into the C program which will be used in later state.
 \section{Inference model and IP generation}
 The weights obtained from neural network model is used to create the inference network. Inference network works exactly like feed forward network in the ANN. It takes in permission as input and outputs whether that permission vector is malware or not. This inference model is used to generate the IP block. Vivado's HLS allowed this model written in C++ to be converted to IP block. One of the major optimization available was loop flattening in hardware itself, which reduced no of clock cycles required to produce the result. This flattening/pipelining is obtained through directives which enable Vivado HLS to optimize designs. These directives can also be used to declare output and input ports to use IP and the type of interface IP supports.
 \section{Block Generation, Synthesis and Application}
 The generated IP gets imported into Vivado Design tool where a block is designed using this IP. The block consist of bram and bram controller for providing data into IP and to obtain result from the IP. It consist of the central processing system (ZYNQ PS) and to connect the devices an AXI interconnection controller is used.

 This design is implemented in HDL and synthesized to generate a bitstream file. This bitstream file along with other information about the generated design is exported into Xilinx SDK. Xilinx SDK allows users to generate applications that will run on the custom hardware designs. The system can be programmed using C,C++ or SystemC and it will run on the PS of the AP SOC. The application talks to PL via interface and it enables users to utilize the custom logic. 
 \par
 The board talks to computer via JTAG protocol which also allows serial communication possible, to send and retrieve data as well as program the FPGA too. Once the PL part is programmed over USB JTAG protocol using bitstream file, the application gets uploaded into PS part of device. The output can be seen through serial monitor.
 \par
 \begin{figure}
    \centering
    \includegraphics[scale=0.43]{model_circuit.png}
    \caption{Block design of custom IP connected to PL system}
    \label{fig:my_label}
 \end{figure}
 \par
 \chapter{RESULTS}
 Accuracy of network was determined separately as a software model itself. Performance evaluation was also done at different targets and output compared.
 \section{Experimental Setup}
 \subsubsection{Android app permission dataset} Alejandro Martín et al. came up with a tool for malware detection called ADROIT. They open sourced dataset they created for their purpose. This dataset consist of 8057 benignware sample and 3417 malware samples.
 \subsubsection{Significant Permissions} Jin Li et al. came with 22 significant permissions of which 21 was available in dataset, which were used for the classification purpose.
 \subsubsection{Neural Network}
 ANN chosen is a simple NN which consist of 1 hidden layer, being the smallest neural network possible here. It is so chosen as a balance between die area, performance and accuracy. In our initial testing 2 hidden layer didn't provided with higher accuracy which can compensate for extra die area. Also Increasing one more layer demands huge increase in look up table (LUT), DSP48E and other units, which our chosen board might not be able to fulfill given the resource consumption of our simple model.
 \subsubsection{Hardware and Software} Hardware used is Xilinx Zynq-7000 (XC7Z010-1CLG400C) found in Zybo evaluation board. It consist of 28,000 logic cells, 240 KB Block RAM, 80 DSP slices and a 650 MHz dual-core Cortex™-A9 processor.
 Vivado Design Suite 2016.2 is used to develop, synthesize the IP and to do performance evaluation.
 \section{Calculation}
 70\% of available dataset is used for training and 30\% is used for testing and evaluation. Accuracy is calculated as ratio of total correctly predicted to total number of data used for testing. With 51 number of nodes in hidden layer, learning rate of 0.0002 and 10000 number of iterations an accuracy of 89.59 is obtained.
 \par
 Performance evaluation is performed by considering the time it took for inference on different platforms. Even with lesser clock speed of 100Mhz,  custom IP showed double the performance of an ARM A9 core running at 650Mhz, at 50us to detect whether a given permission vector is malware or not. The figure shows simulated energy consumption of each parts of the design, which also indicates that our IP is better than a general purpose processor based solution.

 \begin{table}[h]
    \centering
    \begin{tabular}{| c | c | c |} \toprule
        \textbf{Device} & \textbf{Execution time (us)} & \textbf{Clock}\\ \midrule
        Intel® Core™ i7-4770 & 27.379 & 2.280 Ghz \\ \midrule
        ARM Cortex™-A9 & 100.61 & 650 Mhz \\ \midrule
        Custom IP & 53.79 & 100 Mhz\\ \bottomrule
    \end{tabular}
    \caption{ Execution time of the model on different devices and their clock speed}
    \label{tab:my_label}
 \end{table}
 \begin{figure}
    
    \includegraphics[scale=0.6]{utili.png}
    \caption{Resource utilization of IP and helper components shown by Vivado}
    \label{fig:util}
 \end{figure}
 \begin{figure}
    \centering
    \includegraphics[scale=0.5]{power.png}
    \caption{Simulated power usage chart shown by Vivado}
    \label{fig:power}
 \end{figure}
 A helper application is written in python, which takes in an android application's location as a command line argument, strips the permissions from the apk and pass it as one-hot vector to the Zybo board via serial communication with baud rate of 15200 through ttyUSB port. Zybo board with our custom IP sends back the output through serial port which our python program reads and displays to user. The output also contains the result of sigmoid function in the last stage, which determines the app is malware or not ($>$ 0.5 indicates a malware, otherwise benign).
 \begin{figure}
 \centering
    \includegraphics[scale=0.45]{result.png}
    \caption{Result obtained by executing the python program}
    \label{fig:my_label}
 \end{figure}
 \section{Result Analysis}
 Accuracy obtained in test result seems good for initial analysis of such a model. Model can be further improved by increasing number of layers as well as varying the different hyper parameters. This increase in layers, thereby number of weight values which in turn will increase number of LUTs. It can be compensated by performing pruning on thus obtained model, which might further reduce die area and possibly fits on larger FPGA/APSOC boards.
 \par
 The throughput of the IP can further be improved by performing a pipelining of input and also performing a second layer of loop unrolling, which may increase the die area and power consumption. IP clock speed can also be further increased to improve the throughput. 
 \chapter*{CONCLUSION}
 As the computing is going mobile, malware attack on mobile devices will rise. Both static and dynamic analysis methods are required to have a balance between proper detection and computational cost. Static methods using machine learning mechanisms are creating higher prediciton accuracy and reducing the false error rate.
 \par
 Our method is a proof of concept to apply machine learning algorithm on an FPGA and to use it to solve the problem regarding malware in android. 
 Building dedicated malware detection accelerators can reduce the computational load on our mobile processors. If app's activities are also used as parameters for detecting malware using a similarly trained model on dedicated silicon, on-device dynamic runtime detection might also be possible. 
 \par
 Modern flagship smartphones comes with dedicated neural processors, which are general purpose neural processors and can be used in any AI related tasks. Currently these "co-processors" are not being used widely, but in future with support from Google's tensorflow ML library it may get easier for developers to tap into this AI resources easly.
 \renewcommand\bibname{REFERENCES}
 \bibliographystyle{IEEEtran}
 \addcontentsline{toc}{chapter}{REFERENCES}
 \begin{thebibliography}{00}
 	\bibitem{b1} Sun, Lichao, Zhiqiang Li, Qiben Yan, Witawas Srisa-an, and Yu Pan. "SigPID: significant permission identification for android malware detection." In Malicious and Unwanted Software (MALWARE), 2016 11th International Conference on, pp. 1-8. IEEE, 2016.
 	\bibitem{b2} Duarte, J., Han, S., Harris, P., Jindariani, S., Kreinar, E., Kreis, B., Ngadiuba, J., Pierini, M., Tran, N. and Wu, Z., 2018. Fast inference of deep neural networks in FPGAs for particle physics. arXiv preprint arXiv:1804.06913.
 	\bibitem{b3} Arp, D., Spreitzenbarth, M., Hubner, M., Gascon, H., Rieck, K. and Siemens, C.E.R.T., 2014, February. DREBIN: Effective and Explainable Detection of Android Malware in Your Pocket. In Ndss (Vol. 14, pp. 23-26).
 	\bibitem{b4} Aung, Z. and Zaw, W., 2013. Permission-based android malware detection. International Journal of Scientific \& Technology Research, 2(3), pp.228-234.
 	\bibitem{b5} Martín, A., Calleja, A., Menéndez, H. D., Tapiador, J., \& Camacho, D. (2016, December). ADROIT: Android malware detection using meta-information. In Computational Intelligence (SSCI), 2016 IEEE Symposium Series on (pp. 1-8). IEEE.
 \end{thebibliography}
 \clearpage
 \chapter*{APPENDIX}
 \section*{Source code}
 \subsection*{malware\_or\_not\_hardware.py}
 \begin{verbatim}
 import sys
 from androguard.core.bytecodes import apk
 from androguard.core.bytecodes import dvm
 import pandas as pd
 import serial
 import time

 micro_dataset_perm_list = ["android.permission.ACCESS_WIFI_STATE",
 "android.permission.READ_LOGS", "android.permission.CAMERA",
 "android.permission.READ_PHONE_STATE", 
 "android.permission.CHANGE_NETWORK_STATE",
 "android.permission.READ_SMS", "android.permission.CHANGE_WIFI_STATE",
 "android.permission.RECEIVE_BOOT_COMPLETED", 
 "android.permission.DISABLE_KEYGUARD",
 "android.permission.RESTART_PACKAGES", "android.permission.GET_TASKS",
 "android.permission.SEND_SMS", "android.permission.INSTALL_PACKAGES",
 "android.permission.SET_WALLPAPER", "android.permission.READ_CALL_LOG",
 "android.permission.READ_CONTACTS",
 "android.permission.WRITE_APN_SETTINGS",
 "android.permission.READ_EXTERNAL_STORAGE",
 "android.permission.WRITE_CONTACTS",
 "com.android.browser.permission.READ_HISTORY_BOOKMARKS",
 "android.permission.WRITE_SETTINGS"]

 def getPermissions(filename):
    '''
    input: filename
    output: permission list compatible with dataset
    '''
    global micro_dataset_perm_list  # taken from header of dataset

    if(filename[0] is not None):
        app = dvm.APK(filename[0])
        per = app.get_permissions()  # androguard func to get perms
        one_hot_perm_list = [None]*21  # hardcoded
        j = 0
        for i in micro_dataset_perm_list:
            if(i in per):
                one_hot_perm_list[j] = 1
            else:
                one_hot_perm_list[j] = 0
            j = j+1

    return one_hot_perm_list

 def serial_communicator(): #serialtest
    ser = serial.Serial('/dev/ttyUSB1', 115200, timeout=3)
    file = sys.argv[2:]
    perm = getPermissions(file)
    message = ""
    for p in perm:
        message = message + str(p) + ' '
    message = message[:-1] + '\n'
    ser.write(message.encode('utf-8'))
    line = ""
    flag = True
    
    # polling -- the only cause of delay
    while(flag):
        time.sleep(0.1)
        ser.write(message.encode('utf-8'))
        try:
            line = ser.read(40)
            flag = False
        except:
            line = ""
        if(len(line) < 30):
            flag = True

    out = line.split()
    benign = out[1].decode('utf-8')
    mal =  out[2].decode('utf-8')

    if(float(mal) < float(benign)):
        print("benign " + benign)
    else:
        print("malware "+ mal)

 if(str(sys.argv[1]) == "serialtest"):
    serial_communicator()    #for showing demo
 \end{verbatim}
 \clearpage
 \subsection*{neural\_network\_ip.cpp}
 \begin{verbatim}
 #include <hls_math.h>
 #include <stdio.h>
 //for micro_dataset
 #include "uwin.h" # .h file with input weights
 #include "uwout.h" # .h file with output weights
 #define I 21
 #define H 51
 #define O 2

 float sigmoid(float x){
    return 1/(1+  hls::expf(-x)  );
 }

 void mlcore(float inp[I],float res[O]){
 #pragma HLS INTERFACE s_axilite port=return bundle=CRTL_BUS
 #pragma HLS INTERFACE bram port=res
 #pragma HLS INTERFACE bram port=inp
    int i,j,k;
    float c[1][H];
    float output[1][O];
    //first layer
    for(j = 0;j< H;j++){
        c[0][j] = 0;
        for(k = 0;k<I;k++){
            #pragma HLS PIPELINE
            c[0][j] += inp/*ut[0]*/[k] * wi[k][j];}
        c[0][j] = sigmoid(c[0][j]);
    }
 	//second layer
    for(j = 0;j< O;j++){
        output[0][j] = 0;
        for(k = 0;k<H;k++){
            #pragma HLS PIPELINE
            output[0][j] += c[0][k] * wo[k][j];}
        output[0][j] = sigmoid(output[0][j]);
        res[j] = output[0][j];
    }
 }
 \end{verbatim}
 \subsection*{malware\_ip\_middleware\_application.cc}
 \begin{verbatim}
 #include <stdio.h>
 #include <xmlcore.h>
 #include <xil_printf.h>
 #include <xparameters.h>
 #include "xuartps.h"
 #include <math.h>

 #define UART_DEVICE_ID XPAR_PS7_UART_1_DEVICE_ID

 float *inpHW = (float *) 0x42000000;
 float *resHW = (float *) 0x40000000;

 XMlcore doml;
 XMlcore_Config *doml_cfg;

 unsigned int float_to_u32(float val){
    unsigned int result;
    union float_bytes {
        float v;
    unsigned char bytes[4];
    }data;
    data.v = val;
    result = (data.bytes[3] << 24) + (data.bytes[2]<<16)
        + (data.bytes[1]<<8) + (data.bytes[0]);
    return result;
 }


 void init_mlCore(){
    int status = 0;
    doml_cfg = XMlcore_LookupConfig(XPAR_MLCORE_0_DEVICE_ID);
    if(doml_cfg){
        status = XMlcore_CfgInitialize(&doml,doml_cfg);
        if(status != XST_SUCCESS)
            printf("failed to initialize\n");
    }
 }

 void menu(){
    float z[1][I] = {1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1};
    float a[I];
    for(int k = 0; k<I;k++)
        scanf("%f",&a[k]);

    for(int k = 0; k<I;k++)
        z[0][k]= a[k];


    for(int i = 0;i <I;i++)
        inpHW[i] = float_to_u32(z[0][i]);

    //hardware
    XMlcore_Start(&doml);
    //while(!XMlcore_IsReady(&doml));   
    while(!XMlcore_IsDone(&doml));
    while(!XMlcore_IsIdle(&doml));
    printf("result: %f %f \n\n\n\r",resHW[0],resHW[1]);
    } // menu()

 int main()
 {
    init_mlCore();
    float resa[O];
    for(int i = 0;i <I;i++)
        inpHW[i] = float_to_u32(input[0][i]);

    while(true)
        menu();

 return 0;
 }



 \end{verbatim}
 \end{document}