# PythonBasics.py # List of contacts contacts = ['Philip', 'Mary', 'Beth', 'Joe', 'Frank'] # Ask a user to add a contact new_contact = input('Enter a new contact: ') # string data type # Append the new contact to the contacts list contacts.append(new_contact) # print the list print("### Current List of Contacts ###") print(contacts) # original 5 plus 1 new appended # Loop thru the list and output each contact for contact in contacts: # for x in ys x local var scope ys dataset print(f"Contact Name: {contact}") # page 49 - 53 pandas dataframe basics # 1 Dimesional Series aka column > col (value) # 2 Dim row of values crossed columns > row / col (value) # 3 Dim Pivot off of a grouping > pivot row / col (value) # etc ... 7 Dimensional ... # data sets # contacts ['Philip', 'Mary', 'Beth', 'Joe', 'Frank'].append() col A # col B List of Ages length(6) data type int ages = [18, 26, 21, 32, 48, 14] # Pandas Module Python Standard Module # installed with the python app but you need to use the import keyword in your code import pandas as pd # Define our structured data from our lists contacts, ages data = { "Contact": contacts, "Age": ages, "State": ['FL','LA','', 'FL','FL','TX'], "Zip Code": ['12345','','12345','','52145',''] } # data print(data) # define a DataFrame using pandas and our data set "data" df1 = pd.DataFrame(data) print(df1) # 2 ways to access data in a DataFrame page 58-65 ## method 1 > access data by index id ## method 2 > access data by label name, if a label is available # Access a specific row using an index row_1 = df1.iloc[1] print(row_1) row_1_label = df1.loc[1] print(row_1_label) # Access a series of data aka "column" by label name / field name series_age = df1['Age'] print(series_age) mean_age = series_age.mean() # avg agg min_age = series_age.min() # min age print(f"Average Age: {mean_age}") print(f"Minimum Age: {min_age}") # Access 2 or more columns page 59 ## Input a list of lists to call multiple cols ## [ ['Name','Age'] ] cols_data = ['Contact','State','Zip Code','Age'] series_name_state = df1[cols_data] print(series_name_state) # Use a for loop to loop thru a series by label name series_contacts = df1['Contact'] for contact in series_contacts: print(f"Contact Name: {contact}") # query() method page 61 ## relational operators > < ... ## logical operators and or not ## query string operators single quote wrapper 'State == "FL"' ## IF the column name of the series has a space in it, the column MUST be wrapper with the `back tick` '`First Name` == "Bob"' state_query = series_name_state.query('State == "FL"') print(state_query) # printing a dataframe called state_query zip_query = state_query.query('`Zip Code` == "12345"') print(zip_query) # Page 63 Call a column name that is not included in the query print("### Contact Names for Florida based on a Florida query ###") display_contact_query_FL = series_name_state.query('State == "FL"')['Contact'] print(display_contact_query_FL) # you will only display Contact # Additional DataFrame functions ## head() head(3) ## tail() tail(20) ## Handy when testing LARGE data sets beginning and end set ## page 67 sort_values() top_3_records = series_name_state.head(3) # default 5 print(top_3_records) # top 3 bottom_3_records = series_name_state.tail(3) print(bottom_3_records) # bottom 3 sort_byName = series_name_state.sort_values('Contact') print(sort_byName) # all 6 records sorted by name # Chaining Functions Level 1 call more than one function in a pipe ## Get the top 3 records head(3) and sort_values('Contact') top_3_sortBy_Name = series_name_state.head(3).sort_values('Contact') print(top_3_sortBy_Name) # page 71 New Columns based on arithmetic operations ## Age in 10 Year df1['AgeIn10Years'] = df1['Age'] + 10 print(df1)