#import pandas library into our jupyternotebook
import pandas as pd
# import the NDHS 2018 dataset in stata file into pandas
path=r"C:\Users\XXXXXXXXXXX\NDHS 2018\NGIR7AFL.DTA"
DHS_Dataset=pd.read_stata(path)
Key Insight - The NDHS 2018 is an abbreviation for the Nigeria Demographic and Health Survey conducted in 2018. It is a nationally representative survey conducted about every four years by the collaborative efforts of the USAID, the Government of Nigeria, and other development Organizations(WHO, UN Agencies etc).
# Determine the number of rows (observations), and columns(variables or features) of the data
DHS_Dataset.shape
DHS_Dataset['v190'].name
DHS_Dataset['v190'].dtypes
# Create a dictionary that makes the poorest and poorer the key to value poor
# Also, middle key would have middle value
# finally, richer, and richest keys would have rich value
merge_v190 = {
"v190": {"poorest": 'poor',
"poorer": 'poor',
"middle": 'middle',
"richer": 'rich',
"richest": 'rich'}
}
DHS_Dataset.replace(merge_v190, inplace=True)
DHS_Dataset['v190'].value_counts()