Last active
July 1, 2021 05:38
-
-
Save sulaya86/3d52280df99f07d304462c6d257631e8 to your computer and use it in GitHub Desktop.
This python script will open a SPSS fileSPSS is a software platform that offers advanced statistical analysis. In this exercise given a dataset in a SPSS file (.sav), need to find the mean,median, minimum,and maximum values for students and allstudents in the Teaching Ratings data. The purpose is to show basic familiarity of Python to calculate …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""This python script will open a SPSS fileSPSS is a software platform that offers | |
advanced statistical analysis. In this exercise given a dataset in a SPSS file (.sav), | |
need to find the mean,median, minimum,and maximum values for students and allstudents | |
in the Teaching Ratings data. The purpose is to show basic familiarity of Python | |
to calculate central tendency. | |
""" | |
# Author: Soraya Ruiz | |
# Creation Date: 2021-07-01 | |
# Import some required libraries | |
import pandas as pd | |
import os | |
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) | |
file_path = os.path.join(__location__, "datasets", "teachingratings.sav") | |
cols = ['students', 'allstudents'] | |
def read_file(file, columns): | |
""" | |
Read an SPSS file in Python Using Pandas | |
:param file: | |
:param columns: | |
:return: data | |
""" | |
data = pd.read_spss(file, usecols=columns) | |
return data | |
if __name__ == '__main__': | |
# First Step: Read the File by columns | |
df = read_file(file_path, cols) | |
# Number of rows by column | |
total_students = df.count()[0] | |
total_allstudents = df.count()[1] | |
# Second Step Calculate the Central Tendencies | |
mean = df.mean() # the sum of all observations. | |
median = df.median() # defined as the middle most value. | |
minimum = df.min() | |
maximum = df.max() | |
# Bonus: Plot | |
print("Mean") | |
print(mean.to_string(index=True, header=False)) | |
print("---") | |
print("Median") | |
print(median.to_string(index=True, header=False)) | |
print("---") | |
print("Minimum") | |
print(minimum.to_string(index=True, header=False)) | |
print("---") | |
print("Maximum") | |
print(maximum.to_string(index=True, header=False)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment