import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
DfCountrySerie= pd.read_csv("/content/drive/MyDrive/Datasets/EducatifData/EdStatsCountry-Series.csv")
DfCountry= pd.read_csv("/content/drive/MyDrive/Datasets/EducatifData/EdStatsCountry.csv")
DfData= pd.read_csv("/content/drive/MyDrive/Datasets/EducatifData/EdStatsData.csv")
DfFootNote= pd.read_csv("/content/drive/MyDrive/Datasets/EducatifData/EdStatsFootNote.csv")
DfSeries= pd.read_csv("/content/drive/MyDrive/Datasets/EducatifData/EdStatsSeries.csv")
To summarize, we want the countries where the enrolment in tertiary and secondary is high enough and where internet is highly available for a large number of people.
Key Words:
print("CountrySerie:",DfCountrySerie.shape)
print("Country:",DfCountry.shape)
print("Data:",DfData.shape)
print("Series:",DfSeries.shape)
CountrySerie: (613, 4) Country: (241, 32) Data: (886930, 70) Series: (3665, 21)
DfCountrySerie.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 613 entries, 0 to 612 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 CountryCode 613 non-null object 1 SeriesCode 613 non-null object 2 DESCRIPTION 613 non-null object 3 Unnamed: 3 0 non-null float64 dtypes: float64(1), object(3) memory usage: 19.3+ KB
DfCountry.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 241 entries, 0 to 240 Data columns (total 32 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country Code 241 non-null object 1 Short Name 241 non-null object 2 Table Name 241 non-null object 3 Long Name 241 non-null object 4 2-alpha code 238 non-null object 5 Currency Unit 215 non-null object 6 Special Notes 145 non-null object 7 Region 214 non-null object 8 Income Group 214 non-null object 9 WB-2 code 240 non-null object 10 National accounts base year 205 non-null object 11 National accounts reference year 32 non-null float64 12 SNA price valuation 197 non-null object 13 Lending category 144 non-null object 14 Other groups 58 non-null object 15 System of National Accounts 215 non-null object 16 Alternative conversion factor 47 non-null object 17 PPP survey year 145 non-null object 18 Balance of Payments Manual in use 181 non-null object 19 External debt Reporting status 124 non-null object 20 System of trade 200 non-null object 21 Government Accounting concept 161 non-null object 22 IMF data dissemination standard 181 non-null object 23 Latest population census 213 non-null object 24 Latest household survey 141 non-null object 25 Source of most recent Income and expenditure data 160 non-null object 26 Vital registration complete 111 non-null object 27 Latest agricultural census 142 non-null object 28 Latest industrial data 107 non-null float64 29 Latest trade data 185 non-null float64 30 Latest water withdrawal data 179 non-null object 31 Unnamed: 31 0 non-null float64 dtypes: float64(4), object(28) memory usage: 60.4+ KB
DfData.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 886930 entries, 0 to 886929 Data columns (total 70 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country Name 886930 non-null object 1 Country Code 886930 non-null object 2 Indicator Name 886930 non-null object 3 Indicator Code 886930 non-null object 4 1970 72288 non-null float64 5 1971 35537 non-null float64 6 1972 35619 non-null float64 7 1973 35545 non-null float64 8 1974 35730 non-null float64 9 1975 87306 non-null float64 10 1976 37483 non-null float64 11 1977 37574 non-null float64 12 1978 37576 non-null float64 13 1979 36809 non-null float64 14 1980 89122 non-null float64 15 1981 38777 non-null float64 16 1982 37511 non-null float64 17 1983 38460 non-null float64 18 1984 38606 non-null float64 19 1985 90296 non-null float64 20 1986 39372 non-null float64 21 1987 38641 non-null float64 22 1988 38552 non-null float64 23 1989 37540 non-null float64 24 1990 124405 non-null float64 25 1991 74437 non-null float64 26 1992 75543 non-null float64 27 1993 75793 non-null float64 28 1994 77462 non-null float64 29 1995 131361 non-null float64 30 1996 76807 non-null float64 31 1997 73453 non-null float64 32 1998 84914 non-null float64 33 1999 118839 non-null float64 34 2000 176676 non-null float64 35 2001 123509 non-null float64 36 2002 124205 non-null float64 37 2003 130363 non-null float64 38 2004 128814 non-null float64 39 2005 184108 non-null float64 40 2006 140312 non-null float64 41 2007 137272 non-null float64 42 2008 134387 non-null float64 43 2009 142108 non-null float64 44 2010 242442 non-null float64 45 2011 146012 non-null float64 46 2012 147264 non-null float64 47 2013 137509 non-null float64 48 2014 113789 non-null float64 49 2015 131058 non-null float64 50 2016 16460 non-null float64 51 2017 143 non-null float64 52 2020 51436 non-null float64 53 2025 51436 non-null float64 54 2030 51436 non-null float64 55 2035 51436 non-null float64 56 2040 51436 non-null float64 57 2045 51436 non-null float64 58 2050 51436 non-null float64 59 2055 51436 non-null float64 60 2060 51436 non-null float64 61 2065 51436 non-null float64 62 2070 51436 non-null float64 63 2075 51436 non-null float64 64 2080 51436 non-null float64 65 2085 51436 non-null float64 66 2090 51436 non-null float64 67 2095 51436 non-null float64 68 2100 51436 non-null float64 69 Unnamed: 69 0 non-null float64 dtypes: float64(66), object(4) memory usage: 473.7+ MB
DfFootNote.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 643638 entries, 0 to 643637 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 CountryCode 643638 non-null object 1 SeriesCode 643638 non-null object 2 Year 643638 non-null object 3 DESCRIPTION 643638 non-null object 4 Unnamed: 4 0 non-null float64 dtypes: float64(1), object(4) memory usage: 24.6+ MB
DfSeries.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3665 entries, 0 to 3664 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Series Code 3665 non-null object 1 Topic 3665 non-null object 2 Indicator Name 3665 non-null object 3 Short definition 2156 non-null object 4 Long definition 3665 non-null object 5 Unit of measure 0 non-null float64 6 Periodicity 99 non-null object 7 Base Period 314 non-null object 8 Other notes 552 non-null object 9 Aggregation method 47 non-null object 10 Limitations and exceptions 14 non-null object 11 Notes from original source 0 non-null float64 12 General comments 14 non-null object 13 Source 3665 non-null object 14 Statistical concept and methodology 23 non-null object 15 Development relevance 3 non-null object 16 Related source links 215 non-null object 17 Other web links 0 non-null float64 18 Related indicators 0 non-null float64 19 License Type 0 non-null float64 20 Unnamed: 20 0 non-null float64 dtypes: float64(6), object(15) memory usage: 601.4+ KB
print("|--------Country--------------|>")
display(DfCountry.head(1))
print("|--------CountrySerie----------|>")
display(DfCountrySerie.head(1))
print("|-------------Data-------------|>")
display(DfData.head(1))
print("|----------FootNote------------|>")
display(DfFootNote.head(1))
print("|------------Series----------|>")
display(DfSeries.head(1))
|--------Country--------------|>
Country Code | Short Name | Table Name | Long Name | 2-alpha code | Currency Unit | Special Notes | Region | Income Group | WB-2 code | ... | IMF data dissemination standard | Latest population census | Latest household survey | Source of most recent Income and expenditure data | Vital registration complete | Latest agricultural census | Latest industrial data | Latest trade data | Latest water withdrawal data | Unnamed: 31 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ABW | Aruba | Aruba | Aruba | AW | Aruban florin | SNA data for 2000-2011 are updated from offici... | Latin America & Caribbean | High income: nonOECD | AW | ... | NaN | 2010 | NaN | NaN | Yes | NaN | NaN | 2012.0 | NaN | NaN |
1 rows × 32 columns
|--------CountrySerie----------|>
CountryCode | SeriesCode | DESCRIPTION | Unnamed: 3 | |
---|---|---|---|---|
0 | ABW | SP.POP.TOTL | Data sources : United Nations World Population... | NaN |
|-------------Data-------------|>
Country Name | Country Code | Indicator Name | Indicator Code | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | ... | 2060 | 2065 | 2070 | 2075 | 2080 | 2085 | 2090 | 2095 | 2100 | Unnamed: 69 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Arab World | ARB | Adjusted net enrolment rate, lower secondary, ... | UIS.NERA.2 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 rows × 70 columns
|----------FootNote------------|>
CountryCode | SeriesCode | Year | DESCRIPTION | Unnamed: 4 | |
---|---|---|---|---|---|
0 | ABW | SE.PRE.ENRL.FE | YR2001 | Country estimation. | NaN |
|------------Series----------|>
Series Code | Topic | Indicator Name | Short definition | Long definition | Unit of measure | Periodicity | Base Period | Other notes | Aggregation method | ... | Notes from original source | General comments | Source | Statistical concept and methodology | Development relevance | Related source links | Other web links | Related indicators | License Type | Unnamed: 20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | BAR.NOED.1519.FE.ZS | Attainment | Barro-Lee: Percentage of female population age... | Percentage of female population age 15-19 with... | Percentage of female population age 15-19 with... | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | Robert J. Barro and Jong-Wha Lee: http://www.b... | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 rows × 21 columns
print(DfCountry.isnull().sum().sum()," missing values out of",DfCountry.isnull().sum().sum()+DfCountry.notna().sum().sum())
print(DfCountrySerie.isnull().sum().sum()," missing values out of",DfCountrySerie.isnull().sum().sum()+DfCountrySerie.notna().sum().sum())
print(DfData.isnull().sum().sum()," missing values out of",DfData.isnull().sum().sum()+DfData.notna().sum().sum())
print(DfFootNote.isnull().sum().sum()," missing values out of",DfFootNote.isnull().sum().sum()+DfFootNote.notna().sum().sum())
print(DfSeries.isnull().sum().sum()," missing values out of",DfSeries.isnull().sum().sum()+DfSeries.notna().sum().sum())
2354 missing values out of 7712 613 missing values out of 2452 53455179 missing values out of 62085100 643638 missing values out of 3218190 55203 missing values out of 76965
With Percentages instead:
print("DfCountry->",np.round(DfCountry.isnull().sum().sum()*100/(DfCountry.isnull().sum().sum()+DfCountry.notna().sum().sum())),"% of missing data")
print("DfCountrySerie->",np.round(DfCountrySerie.isnull().sum().sum()*100/(DfCountrySerie.isnull().sum().sum()+DfCountrySerie.notna().sum().sum())),"% of missing data")
print("DfData->",np.round(DfData.isnull().sum().sum()*100/(DfData.isnull().sum().sum()+DfData.notna().sum().sum())),"% of missing data")
print("DfFootNote->",np.round(DfFootNote.isnull().sum().sum()*100/(DfFootNote.isnull().sum().sum()+DfFootNote.notna().sum().sum())),"% of missing data")
print("DfSeries->",np.round(DfSeries.isnull().sum().sum()*100/(DfSeries.isnull().sum().sum()+DfSeries.notna().sum().sum())),"% of missing data")
DfCountry-> 31.0 % of missing data DfCountrySerie-> 25.0 % of missing data DfData-> 86.0 % of missing data DfFootNote-> 20.0 % of missing data DfSeries-> 72.0 % of missing data
ListOfIndicator=DfData["Indicator Name"].unique().tolist()
What we want in the analysis We want the countries with midle to upper income, where internet is used and for tertiary ans secondary education
DfCountry.columns.unique
<bound method Index.unique of Index(['Country Code', 'Short Name', 'Table Name', 'Long Name', '2-alpha code', 'Currency Unit', 'Special Notes', 'Region', 'Income Group', 'WB-2 code', 'National accounts base year', 'National accounts reference year', 'SNA price valuation', 'Lending category', 'Other groups', 'System of National Accounts', 'Alternative conversion factor', 'PPP survey year', 'Balance of Payments Manual in use', 'External debt Reporting status', 'System of trade', 'Government Accounting concept', 'IMF data dissemination standard', 'Latest population census', 'Latest household survey', 'Source of most recent Income and expenditure data', 'Vital registration complete', 'Latest agricultural census', 'Latest industrial data', 'Latest trade data', 'Latest water withdrawal data', 'Unnamed: 31'], dtype='object')>
DfCountry["Income Group"].unique()
array(['High income: nonOECD', 'Low income', 'Upper middle income', nan, 'Lower middle income', 'High income: OECD'], dtype=object)
DfCountriesWithGoodIncome=DfCountry[["Short Name","Income Group"]][DfCountry[["Short Name","Income Group"]]["Income Group"].isin(['Upper middle income'])]
ListOfCountry=DfCountriesWithGoodIncome['Short Name'].unique().tolist()
We remove the useless colls ->
Dfs=DfData.drop(DfData.iloc[:,4:35].columns.tolist(),axis=1).copy()
Dfs.columns
Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2020', '2025', '2030', '2035', '2040', '2045', '2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090', '2095', '2100', 'Unnamed: 69'], dtype='object')
Dfs.drop(["Indicator Code","Country Code"],axis=1,inplace=True)
Dfs.drop(Dfs.iloc[:,2:11],axis=1,inplace=True)
Dfs.drop(Dfs.iloc[:,8:],axis=1,inplace=True)
Dfs.head(5)
Country Name | Indicator Name | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | |
---|---|---|---|---|---|---|---|---|
0 | Arab World | Adjusted net enrolment rate, lower secondary, ... | NaN | NaN | NaN | NaN | NaN | NaN |
1 | Arab World | Adjusted net enrolment rate, lower secondary, ... | NaN | NaN | NaN | NaN | NaN | NaN |
2 | Arab World | Adjusted net enrolment rate, lower secondary, ... | NaN | NaN | NaN | NaN | NaN | NaN |
3 | Arab World | Adjusted net enrolment rate, lower secondary, ... | NaN | NaN | NaN | NaN | NaN | NaN |
4 | Arab World | Adjusted net enrolment rate, primary, both sex... | 85.211998 | 85.24514 | 86.101669 | 85.51194 | 85.320152 | NaN |
We filter now the countries from DfData by the countries from DfCountry
DFTransformed=Dfs[Dfs['Country Name'].isin(ListOfCountry)]
Is there any indicators about internet?
for elem in DFTransformed['Indicator Name'].unique().tolist():
if elem.find('Internet')!=-1:
print(elem)
Internet users (per 100 people)
Is there any indicator about the tertiary, and enrolment?
for elem in DFTransformed['Indicator Name'].unique().tolist():
if elem.find('tertiary')!=-1:
if elem.find('Enrolment')!=-1:
if elem.find('sexes')!=-1:
print(elem)
Enrolment in post-secondary non-tertiary education, both sexes (number) Enrolment in post-secondary non-tertiary education, private institutions, both sexes (number) Enrolment in post-secondary non-tertiary education, public institutions, both sexes (number) Enrolment in tertiary education per 100,000 inhabitants, both sexes Enrolment in tertiary education, all programmes, both sexes (number) Enrolment in tertiary education, ISCED 5 programmes, both sexes (number) Enrolment in tertiary education, ISCED 6 programmes, both sexes (number) Enrolment in tertiary education, ISCED 7 programmes, both sexes (number) Enrolment in tertiary education, ISCED 8 programmes, both sexes (number)
Is there any indicator about the secondary, and enrolment?
for elem in DFTransformed['Indicator Name'].unique().tolist():
if elem.find('secondary')!=-1:
if elem.find('Enrolment')!=-1:
if elem.find('sexes')!=-1:
print(elem)
Enrolment in Grade 1 of lower secondary general education, both sexes (number) Enrolment in Grade 2 of lower secondary general education, both sexes (number) Enrolment in Grade 3 of lower secondary general education, both sexes (number) Enrolment in Grade 4 of lower secondary general education, both sexes (number) Enrolment in Grade 5 of lower secondary general education, both sexes (number) Enrolment in Grade 6 of lower secondary general education, both sexes (number) Enrolment in lower secondary education, both sexes (number) Enrolment in lower secondary education, private institutions, both sexes (number) Enrolment in lower secondary education, public institutions, both sexes (number) Enrolment in lower secondary general education, Grade unspecified, both sexes (number) Enrolment in lower secondary general, both sexes (number) Enrolment in lower secondary vocational, both sexes (number) Enrolment in post-secondary non-tertiary education, both sexes (number) Enrolment in post-secondary non-tertiary education, private institutions, both sexes (number) Enrolment in post-secondary non-tertiary education, public institutions, both sexes (number) Enrolment in secondary education, both sexes (number) Enrolment in secondary education, private institutions, both sexes (number) Enrolment in secondary education, public institutions, both sexes (number) Enrolment in secondary general, both sexes (number) Enrolment in secondary vocational, both sexes (number) Enrolment in upper secondary education, both sexes (number) Enrolment in upper secondary education, private institutions, both sexes (number) Enrolment in upper secondary education, public institutions, both sexes (number) Enrolment in upper secondary general, both sexes (number) Enrolment in upper secondary vocational, both sexes (number)
The indicators for our study are:
DFTransformed.shape
(190580, 8)
DFTransformed.dropna(inplace=True)
/usr/local/lib/python3.8/dist-packages/pandas/util/_decorators.py:311: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return func(*args, **kwargs)
DFTransformed.shape
(12136, 8)
dfCountriesinternet=DFTransformed[DFTransformed['Indicator Name']=="Internet users (per 100 people)"]['Country Name'].unique()
len(dfCountriesinternet)
49
dfCountriesSecondary=DFTransformed[DFTransformed['Indicator Name']=="Enrolment in secondary general, both sexes (number)"]['Country Name'].unique()
len(dfCountriesSecondary)
28
dfCountriesTertiary=DFTransformed[DFTransformed['Indicator Name']=="Enrolment in tertiary education, all programmes, both sexes (number)"]['Country Name'].unique()
len(dfCountriesTertiary)
22
list(set(dfCountriesSecondary)-set(dfCountriesTertiary))
['Seychelles', 'Ecuador', 'Dominican Republic', 'Peru', 'Costa Rica', 'Suriname', 'Belize']
list(set(dfCountriesTertiary)-set(dfCountriesSecondary))
['Botswana']
DFTransformed.drop( DFTransformed[DFTransformed['Country Name'].isin(list(set(dfCountriesSecondary)-set(dfCountriesTertiary)))].index,inplace=True)
/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py:4906: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return super().drop(
len(DFTransformed['Country Name'].unique())
45
DFTransformed.drop( DFTransformed[DFTransformed['Country Name'].isin(list(set(dfCountriesTertiary)-set(dfCountriesSecondary)))].index,inplace=True)
len(DFTransformed['Country Name'].unique())
44
DFTransformed["Country Name"].unique()
array(['Albania', 'Algeria', 'American Samoa', 'Angola', 'Argentina', 'Azerbaijan', 'Belarus', 'Bosnia and Herzegovina', 'Brazil', 'Bulgaria', 'China', 'Colombia', 'Cuba', 'Dominica', 'Fiji', 'Gabon', 'Grenada', 'Hungary', 'Iraq', 'Jamaica', 'Jordan', 'Kazakhstan', 'Lebanon', 'Libya', 'Malaysia', 'Maldives', 'Marshall Islands', 'Mauritius', 'Mexico', 'Montenegro', 'Namibia', 'Palau', 'Panama', 'Romania', 'Serbia', 'South Africa', 'St. Lucia', 'St. Vincent and the Grenadines', 'Thailand', 'Tonga', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu'], dtype=object)
dfCountriesinternet=DFTransformed[DFTransformed['Indicator Name']=="Internet users (per 100 people)"]['Country Name'].unique()
len(dfCountriesinternet)
41
dfCountriesSecondary=DFTransformed[DFTransformed['Indicator Name']=="Enrolment in secondary general, both sexes (number)"]['Country Name'].unique()
len(dfCountriesSecondary)
21
dfCountriesTertiary=DFTransformed[DFTransformed['Indicator Name']=="Enrolment in tertiary education, all programmes, both sexes (number)"]['Country Name'].unique()
len(dfCountriesTertiary)
21
DFTransformed.head(3)
Country Name | Indicator Name | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | |
---|---|---|---|---|---|---|---|---|
95992 | Albania | Duration of compulsory education (years) | 8.0 | 8.0 | 9.0 | 9.0 | 9.0 | 9.0 |
96467 | Albania | Enrolment in pre-primary education, both sexes... | 74914.0 | 76389.0 | 80488.0 | 81865.0 | 81448.0 | 82494.0 |
96468 | Albania | Enrolment in pre-primary education, female (nu... | 35340.0 | 36021.0 | 38153.0 | 38939.0 | 38767.0 | 39229.0 |
list(set(dfCountriesinternet)-set(dfCountriesSecondary))
['Jamaica', 'St. Vincent and the Grenadines', 'Mexico', 'Jordan', 'Tuvalu', 'Argentina', 'Grenada', 'Dominica', 'Marshall Islands', 'Maldives', 'Iraq', 'Montenegro', 'South Africa', 'Tonga', 'Panama', 'Fiji', 'Namibia', 'Gabon', 'Turkmenistan', 'Angola']
DFTransformed.drop( DFTransformed[DFTransformed['Country Name'].isin(list(set(dfCountriesinternet)-set(dfCountriesSecondary)))].index,inplace=True)
/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py:4906: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return super().drop(
dfCountriesinternet=DFTransformed[DFTransformed['Indicator Name']=="Internet users (per 100 people)"]['Country Name'].unique()
len(dfCountriesinternet)
21
Top 20 countries using internets in 2015:
Top20Internet=DFTransformed[DFTransformed["Indicator Name"]=="Internet users (per 100 people)"].nlargest(20,"2015")["Country Name"].tolist()
print(Top20Internet)
['Azerbaijan', 'Lebanon', 'Kazakhstan', 'Hungary', 'Malaysia', 'Belarus', 'Serbia', 'Bosnia and Herzegovina', 'Albania', 'Brazil', 'Bulgaria', 'Colombia', 'Romania', 'Turkey', 'China', 'Mauritius', 'Tunisia', 'St. Lucia', 'Thailand', 'Algeria']
Top 5 countries Tertiary in 2015:
Top5Tertiary=DFTransformed[DFTransformed["Indicator Name"]=="Enrolment in tertiary education, all programmes, both sexes (number)"].nlargest(5,"2015")["Country Name"].tolist()
Top5Tertiary
['China', 'Brazil', 'Turkey', 'Colombia', 'Thailand']
Top 5 countries Secondary in 2015:
Top5Secondary=DFTransformed[DFTransformed["Indicator Name"]=="Enrolment in secondary education, both sexes (number)"].nlargest(5,"2015")["Country Name"].tolist()
Top5Secondary
['China', 'Brazil', 'Turkey', 'Thailand', 'Colombia']
set(Top5Tertiary)-set(Top20Internet)
set()
set(Top5Secondary)-set(Top20Internet)
set()
Thanks for reading !