import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('Data/Global_Population_2023.csv')
df.head()

df['Population 2023'] = df['Population 2023'].str.replace(',', '').astype(int)
top_10_countries = df.sort_values("Population 2023", ascending=False).head(10)
# Plotting the bar chart
fig, ax = plt.subplots(figsize=(10,6))

# Plotting the bar chart
ax.bar(top_10_countries['Country/Dependency'], top_10_countries["Population 2023"], color='skyblue')

# Adding title and labels
ax.set_title('Top 10 Most Populous Countries in 2023', fontsize=14)
ax.set_xlabel('Country', fontsize=12)
ax.set_ylabel('Population (Billions)', fontsize=12)

# Adjusting x-ticks for better readability
ax.set_xticks(range(len(top_10_countries)))
ax.set_xticklabels(top_10_countries['Country/Dependency'], rotation=45, ha='right')

# Adjusting layout to prevent clipping
fig.tight_layout()

top_countries = df.sort_values('Net Migrants', ascending=False).head(3)

# Create a bar chart
fig, ax = plt.subplots(figsize=(10,6))
ax.bar(top_countries['Country/Dependency'], top_countries['Net Migrants'], color='green')

# Adding title and labels
ax.set_title('Top 3 Countries with the Largest Net Migration')
ax.set_xlabel('Country')
ax.set_ylabel('Net Migration')

# Show plot
plt.show()

df['World Population Share %'] = df['World Population Share %'].str.replace('%', '').str.strip().astype(float)

    top_5_countries = df.sort_values('Population 2023', ascending=False).head()
# Calculate the world population share of the remaining countries
other_share = 100 - (top_5_countries['World Population Share %'].sum())

# Add the "Other" category
countries = list(top_5_countries['Country/Dependency']) + ['Other']
population_share = list(top_5_countries['World Population Share %']) + [other_share]

# Plotting the pie chart
fig, ax = plt.subplots(figsize=(8,8))
ax.pie(population_share, labels=countries, autopct='%1.1f%%', startangle=140, colors=plt.cm.Paired.colors)
ax.set_title('World Population Share of Top 5 Countries (2023)', fontsize=14)

# Show the plot
plt.show()

fig, ax = plt.subplots(figsize=(10,6))

# Plotting the scatter plot
ax.scatter(df['Land Area (Km²)'], df['Population 2023'],  alpha=0.7, color='blue')

# Adding titles and labels
ax.set_title('Relationship Between Population and Land Area (2023)', fontsize=14)
ax.set_xlabel('Land Area (Km²)', fontsize=12)
ax.set_ylabel('Population (2023)', fontsize=12)

# Display the plot
plt.show()

bins = [0, 20, 30, 40, 50]
labels = ['0-20', '21-30', '31-40', '41+']
df['Age Group'] = pd.cut(df['Median Age'], bins=bins, labels=labels)

# Calculate average fertility rate for each age group
average_fertility_by_age_group = df.groupby('Age Group', observed=True)['Fertility Rate'].mean()

# Plotting the results
fig, ax = plt.subplots(figsize=(10,6))
average_fertility_by_age_group.plot(kind='bar', color='pink', ax=ax)
ax.set_title('Average Fertility Rate by Age Group', fontsize=14)
ax.set_xlabel('Age Group', fontsize=12)
ax.set_ylabel('Average Fertility Rate', fontsize=12)

plt.show()

fig, ax = plt.subplots(figsize=(10,6))
ax.hist(df['Median Age'], bins=15, edgecolor='black', alpha=0.7)

# Adding titles and labels
ax.set_title('Distribution of Median Ages Across Countries')
ax.set_xlabel('Median Age')
ax.set_ylabel('Frequency')

# Display the plot
plt.show()

fig, ax = plt.subplots(figsize=(12,8))

# Define bar width and position
bar_width = 0.35
index = range(len(top_10_countries))

# Plot Population
ax.bar(index, top_10_countries['Population 2023'], bar_width, label='Population 2023', color='blue')

# Plot Net Change
ax.bar([i + bar_width for i in index], top_10_countries['Net Population Change'], bar_width, label='Net Population Change', color='red')

# Set labels, title, and legend
ax.set_xlabel('Country')
ax.set_ylabel('Value')
ax.set_title('Population and Net Population Change for Top 10 Countries')
ax.set_xticks([i + bar_width / 2 for i in index])
ax.set_xticklabels(top_10_countries['Country/Dependency'], rotation=45, ha='right')
ax.legend()

# Show plot
plt.tight_layout()
plt.show()

average_fertility_rate = df['Fertility Rate'].mean()
fig, ax = plt.subplots(figsize=(10,6))
ax.hist(df["Fertility Rate"], bins=20, color='skyblue', edgecolor='black', alpha=0.7)

# Add a vertical line for the average fertility rate
ax.axvline(average_fertility_rate, color='red', linestyle='--', linewidth=1.5, label=f'Average Fertility Rate: {average_fertility_rate:.2f}')

# Adding title and labels
ax.set_title('Histogram of Fertility Rates Across All Countries')
ax.set_xlabel('Fertility Rate')
ax.set_ylabel('Frequency')

# Show plot
plt.show()

	Unnamed: 0	Country/Dependency	Population 2023	Yearly Population Change	Net Population Change	Population Density (P/Km²)	Land Area (Km²)	Net Migrants	Fertility Rate	Median Age	Urban Population %	World Population Share %
0	0	India	1,428,627,663	0.81 %	11454490	481	2973190	-486136	2.0	28.0	36 %	17.76 %
1	1	China	1,425,671,352	-0.02 %	-215985	152	9388211	-310220	1.2	39.0	65 %	17.72 %
2	2	United States	339,996,563	0.50 %	1706706	37	9147420	999700	1.7	38.0	83 %	4.23 %
3	3	Indonesia	277,534,122	0.74 %	2032783	153	1811570	-49997	2.1	30.0	59 %	3.45 %
4	4	Pakistan	240,485,658	1.98 %	4660796	312	770880	-165988	3.3	21.0	35 %	2.99 %

Activities¶

1. Plot a bar chart showing the population of the top 10 most populous countries.¶

2. Find the top 3 countries with the largest net migration and visualize them using a bar chart¶

4. Create a scatter plot showing the relationship between population and land area.¶

5. Compare fertility rates across countries by similar median ages to see how fertility rates vary.¶

6. Visualize Median Age Distribution Across All Countries through a Histogram¶

7. Display Population and Net Population Change for Top 10 Countries Using a Grouped bar chart¶

8. Create a histogram of the fertility rates across all countries.¶

Activities¶

1. Plot a bar chart showing the population of the top 10 most populous countries.¶

2. Find the top 3 countries with the largest net migration and visualize them using a bar chart¶

3. Create a pie chart visualizing the World Population Share of the Top 5 Countries in 2023¶

4. Create a scatter plot showing the relationship between population and land area.¶

5. Compare fertility rates across countries by similar median ages to see how fertility rates vary.¶

6. Visualize Median Age Distribution Across All Countries through a Histogram¶

7. Display Population and Net Population Change for Top 10 Countries Using a Grouped bar chart¶

8. Create a histogram of the fertility rates across all countries.¶