In: Computer Science
The table below contains the average daily temperature (C) in two cities: NYC and Philly, recorded during the month of July. Transform the dataset from the wide to the long format. i.e. the new table should have three columns: day, city, and temperature.
import numpy as np
np.random.seed(seed=0)
Cities = ['New York City','Philadelphia']
Temperatures_Data = []
Day = 1
for i in range(100):
Daily_Temperatures = [Day]
for City in Cities:
Temperature = round(np.random.normal(loc=50,scale=12),2)
Daily_Temperatures.append(Temperature)
Temperatures_Data.append(Daily_Temperatures)
Day += 1
df_columns = ['Day']
for City in Cities:
df_columns.append(City)
import pandas as pd
Temperatures_Wide_df = pd.DataFrame(data=Temperatures_Data,columns=df_columns)
Temperatures_Wide_df.iloc[0:10]
Python Code to Transform the dataset from the wide to the long format as shown below:-
import numpy as np
import pandas as pd
np.random.seed(seed=0)
Cities = ['New York City','Philadelphia']
Temperatures_Data = []
Day = 1
for i in range(100):
Daily_Temperatures = [Day]
for City in Cities:
Temperature = round(np.random.normal(loc=50,scale=12),2)
Daily_Temperatures.append(Temperature)
Temperatures_Data.append(Daily_Temperatures)
Day += 1
df_columns = ['Day']
for City in Cities:
df_columns.append(City)
Temperatures_Wide_df =
pd.DataFrame(data=Temperatures_Data,columns=df_columns)
Temperatures_Wide_df=Temperatures_Wide_df.iloc[0:10]
df2=pd.melt(Temperatures_Wide_df,id_vars=['Day'],var_name='City',
value_name='Temperature')
print(df2)
Output:-