# PythonBasics.py

#  List of contacts
contacts = ['Philip', 'Mary', 'Beth', 'Joe', 'Frank']

# Ask a user to add a contact 
new_contact = input('Enter a new contact: ') # string data type

# Append the new contact to the contacts list
contacts.append(new_contact)

# print the list
print("### Current List of Contacts ###")
print(contacts) # original 5 plus 1 new appended

# Loop thru the list and output each contact
for contact in contacts:  # for x in ys  x local var scope ys dataset
    print(f"Contact Name: {contact}")


# page 49 - 53 pandas dataframe basics
# 1 Dimesional Series aka column > col (value)
# 2 Dim row of values crossed columns > row / col (value)
# 3 Dim Pivot off of a grouping > pivot row / col (value)
# etc ... 7 Dimensional ...

# data sets
# contacts ['Philip', 'Mary', 'Beth', 'Joe', 'Frank'].append() col A
# col B List of Ages length(6) data type int
ages = [18, 26, 21, 32, 48, 14]

# Pandas Module Python Standard Module
# installed with the python app but you need to use the import keyword in your code
import pandas as pd

# Define our structured data from our lists contacts, ages
data = {
    "Contact": contacts,
    "Age": ages,
    "State": ['FL','LA','', 'FL','FL','TX'],
    "Zip Code": ['12345','','12345','','52145','']
} # data

print(data)

# define a DataFrame using pandas and our data set "data"
df1 = pd.DataFrame(data)
print(df1)

# 2 ways to access data in a DataFrame page 58-65
## method 1 > access data by index id
## method 2 > access data by label name, if a label is available

# Access a specific row using an index
row_1 = df1.iloc[1]
print(row_1)
row_1_label = df1.loc[1]
print(row_1_label)

# Access a series of data aka "column" by label name / field name
series_age = df1['Age']
print(series_age)
mean_age = series_age.mean() # avg agg
min_age = series_age.min() # min age
print(f"Average Age: {mean_age}")
print(f"Minimum Age: {min_age}")

# Access 2 or more columns page 59
## Input a list of lists to call multiple cols 
## [ ['Name','Age'] ]

cols_data = ['Contact','State','Zip Code','Age']
series_name_state = df1[cols_data]
print(series_name_state)

# Use a for loop to loop thru a series by label name
series_contacts = df1['Contact']
for contact in series_contacts:
    print(f"Contact Name: {contact}")

# query() method page 61
## relational operators > < ...
## logical operators and or not
## query string operators single quote wrapper 'State == "FL"'
## IF the column name of the series has a space in it, the column MUST be wrapper with the `back tick`  '`First Name` == "Bob"'

state_query = series_name_state.query('State == "FL"')

print(state_query) # printing a dataframe called state_query

zip_query = state_query.query('`Zip Code` == "12345"')
print(zip_query)

# Page 63 Call a column name that is not included in the query
print("### Contact Names for Florida based on a Florida query ###")
display_contact_query_FL = series_name_state.query('State == "FL"')['Contact']
print(display_contact_query_FL) # you will only display Contact

# Additional DataFrame functions
## head()   head(3)
## tail()   tail(20)
## Handy when testing LARGE data sets beginning and end set
## page 67 sort_values()

top_3_records = series_name_state.head(3) # default 5
print(top_3_records) # top 3
bottom_3_records = series_name_state.tail(3)
print(bottom_3_records) # bottom 3
sort_byName = series_name_state.sort_values('Contact')
print(sort_byName) # all 6 records sorted by name

# Chaining Functions Level 1 call more than one function in a pipe
## Get the top 3 records head(3) and sort_values('Contact')
top_3_sortBy_Name = series_name_state.head(3).sort_values('Contact')
print(top_3_sortBy_Name)

# page 71 New Columns based on arithmetic operations
## Age in 10 Year

df1['AgeIn10Years'] = df1['Age'] + 10
print(df1)