In [2]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot
NHIS = pd.read_csv('/Users/bonnymcclain/Desktop/familyxx.csv')
print(NHIS.head(5))
print(NHIS.tail(5))
   Year Completed  FMX  RECTYPE  SRVY_YR  HHX  FM_SIZE  FM_STRCP  FM_TYPE  \
0            2018    1       60     2018    1        1        11        1   
1            2018    1       60     2018    4        3        41        4   
2            2018    1       60     2018    6        4        41        4   
3            2018    1       60     2018    8        3        41        4   
4            2018    1       60     2018    9        1        11        1   

   FM_STRP  CURWRKN  ...  COVCONF  FHICOST  FMEDBILL  FMEDBPAY  FMEDBNOP  \
0       11      1.0  ...      NaN        1         2         2       NaN   
1       41      2.0  ...      4.0        2         1         1       1.0   
2       41      2.0  ...      1.0        2         2         2       NaN   
3       41      2.0  ...      3.0        3         2         1       NaN   
4       11      2.0  ...      NaN        2         1         1       1.0   

   FSAF  FHICOVCT  FHICOVYN  FPRCOOH  FHIEBCCT  
0     2         1         1      NaN       NaN  
1     2         3         1      2.0       3.0  
2     1         4         1      2.0       4.0  
3     2         3         1      2.0       3.0  
4     2         1         1      2.0       0.0  

[5 rows x 125 columns]
       Year Completed  FMX  RECTYPE  SRVY_YR    HHX  FM_SIZE  FM_STRCP  \
30304            2018    3       60     2018  55556        1        12   
30305            2018    1       60     2018  55557        1        11   
30306            2018    1       60     2018  55560        1        11   
30307            2018    1       60     2018  55562        2        21   
30308            2018    1       60     2018  55563        1        11   

       FM_TYPE  FM_STRP  CURWRKN  ...  COVCONF  FHICOST  FMEDBILL  FMEDBPAY  \
30304        1       12      2.0  ...      1.0        1         2         2   
30305        1       11      1.0  ...      NaN        2         1         2   
30306        1       11      2.0  ...      4.0        5         1         1   
30307        2       21      1.0  ...      2.0        2         2         2   
30308        1       11      2.0  ...      NaN        1         2         2   

       FMEDBNOP  FSAF  FHICOVCT  FHICOVYN  FPRCOOH  FHIEBCCT  
30304       NaN     2         1         1      NaN       1.0  
30305       1.0     2         0         2      NaN       NaN  
30306       2.0     2         1         1      2.0       1.0  
30307       NaN     2         2         1      2.0       2.0  
30308       NaN     2         1         1      2.0       0.0  

[5 rows x 125 columns]
In [3]:
#need to figure out how Pandas is storing data types
print(df.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30309 entries, 0 to 30308
Columns: 125 entries, Year Completed to FHIEBCCT
dtypes: float64(24), int64(101)
memory usage: 28.9 MB
None
In [3]:
NHIS.columns
Out[3]:
Index(['Year Completed', 'FMX', 'RECTYPE', 'SRVY_YR', 'HHX', 'FM_SIZE',
       'FM_STRCP', 'FM_TYPE', 'FM_STRP', 'CURWRKN',
       ...
       'COVCONF', 'FHICOST', 'FMEDBILL', 'FMEDBPAY', 'FMEDBNOP', 'FSAF',
       'FHICOVCT', 'FHICOVYN', 'FPRCOOH', 'FHIEBCCT'],
      dtype='object', length=125)
In [10]:
NHIS.rename(columns={'FMX':'family_number','RECTYPE':'Family','HHX':'Household', 'TELCELN': 'working_mobile'},
           inplace=True
                     )
print(NHIS.columns)
Index(['Year Completed', 'family_number', 'Family', 'SRVY_YR', 'Household',
       'FM_SIZE', 'FM_STRCP', 'FM_TYPE', 'FM_STRP', 'CURWRKN',
       ...
       'COVCONF', 'FHICOST', 'FMEDBILL', 'FMEDBPAY', 'FMEDBNOP', 'FSAF',
       'FHICOVCT', 'FHICOVYN', 'FPRCOOH', 'FHIEBCCT'],
      dtype='object', length=125)
In [11]:
import sys
!{sys.executable} -m pip install pandas-profiling
Requirement already satisfied: pandas-profiling in /anaconda3/lib/python3.7/site-packages (2.3.0)
Requirement already satisfied: confuse>=1.0.0 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (1.0.0)
Requirement already satisfied: htmlmin>=0.1.12 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (0.1.12)
Requirement already satisfied: phik>=0.9.8 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (0.9.8)
Requirement already satisfied: jinja2>=2.8 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (2.10.1)
Requirement already satisfied: astropy in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (3.2.1)
Requirement already satisfied: pandas>=0.19 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (0.24.2)
Requirement already satisfied: matplotlib>=1.4 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (3.1.0)
Requirement already satisfied: missingno>=0.4.2 in /anaconda3/lib/python3.7/site-packages (from pandas-profiling) (0.4.2)
Requirement already satisfied: pyyaml in /anaconda3/lib/python3.7/site-packages (from confuse>=1.0.0->pandas-profiling) (5.1.1)
Requirement already satisfied: pytest>=4.0.2 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (5.0.1)
Requirement already satisfied: numpy>=1.15.4 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (1.16.4)
Requirement already satisfied: nbconvert>=5.3.1 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (5.5.0)
Requirement already satisfied: jupyter-client>=5.2.3 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (5.3.1)
Requirement already satisfied: numba>=0.38.1 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (0.44.1)
Requirement already satisfied: pytest-pylint>=0.13.0 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (0.14.1)
Requirement already satisfied: scipy>=1.1.0 in /anaconda3/lib/python3.7/site-packages (from phik>=0.9.8->pandas-profiling) (1.3.0)
Requirement already satisfied: MarkupSafe>=0.23 in /anaconda3/lib/python3.7/site-packages (from jinja2>=2.8->pandas-profiling) (1.1.1)
Requirement already satisfied: python-dateutil>=2.5.0 in /anaconda3/lib/python3.7/site-packages (from pandas>=0.19->pandas-profiling) (2.8.0)
Requirement already satisfied: pytz>=2011k in /anaconda3/lib/python3.7/site-packages (from pandas>=0.19->pandas-profiling) (2019.1)
Requirement already satisfied: cycler>=0.10 in /anaconda3/lib/python3.7/site-packages (from matplotlib>=1.4->pandas-profiling) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /anaconda3/lib/python3.7/site-packages (from matplotlib>=1.4->pandas-profiling) (1.1.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /anaconda3/lib/python3.7/site-packages (from matplotlib>=1.4->pandas-profiling) (2.4.0)
Requirement already satisfied: seaborn in /anaconda3/lib/python3.7/site-packages (from missingno>=0.4.2->pandas-profiling) (0.9.0)
Requirement already satisfied: py>=1.5.0 in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (1.8.0)
Requirement already satisfied: packaging in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (19.0)
Requirement already satisfied: attrs>=17.4.0 in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (19.1.0)
Requirement already satisfied: more-itertools>=4.0.0 in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (7.0.0)
Requirement already satisfied: atomicwrites>=1.0 in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (1.3.0)
Requirement already satisfied: pluggy<1.0,>=0.12 in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (0.12.0)
Requirement already satisfied: importlib-metadata>=0.12 in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (0.17)
Requirement already satisfied: wcwidth in /anaconda3/lib/python3.7/site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (0.1.7)
Requirement already satisfied: bleach in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (3.1.0)
Requirement already satisfied: jupyter-core in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (4.5.0)
Requirement already satisfied: pygments in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (2.4.2)
Requirement already satisfied: mistune>=0.8.1 in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.8.4)
Requirement already satisfied: pandocfilters>=1.4.1 in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (1.4.2)
Requirement already satisfied: entrypoints>=0.2.2 in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.3)
Requirement already satisfied: testpath in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.3.1)
Requirement already satisfied: traitlets>=4.2 in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (4.3.2)
Requirement already satisfied: nbformat>=4.4 in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (4.4.0)
Requirement already satisfied: defusedxml in /anaconda3/lib/python3.7/site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.6.0)
Requirement already satisfied: tornado>=4.1 in /anaconda3/lib/python3.7/site-packages (from jupyter-client>=5.2.3->phik>=0.9.8->pandas-profiling) (5.1.1)
Requirement already satisfied: pyzmq>=13 in /anaconda3/lib/python3.7/site-packages (from jupyter-client>=5.2.3->phik>=0.9.8->pandas-profiling) (18.0.0)
Requirement already satisfied: llvmlite>=0.29.0 in /anaconda3/lib/python3.7/site-packages (from numba>=0.38.1->phik>=0.9.8->pandas-profiling) (0.29.0)
Requirement already satisfied: pylint>=1.4.5 in /anaconda3/lib/python3.7/site-packages (from pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (2.3.1)
Requirement already satisfied: six in /anaconda3/lib/python3.7/site-packages (from pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (1.12.0)
Requirement already satisfied: setuptools in /anaconda3/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=1.4->pandas-profiling) (41.0.1)
Requirement already satisfied: zipp>=0.5 in /anaconda3/lib/python3.7/site-packages (from importlib-metadata>=0.12->pytest>=4.0.2->phik>=0.9.8->pandas-profiling) (0.5.1)
Requirement already satisfied: webencodings in /anaconda3/lib/python3.7/site-packages (from bleach->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.5.1)
Requirement already satisfied: ipython-genutils in /anaconda3/lib/python3.7/site-packages (from traitlets>=4.2->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.2.0)
Requirement already satisfied: decorator in /anaconda3/lib/python3.7/site-packages (from traitlets>=4.2->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (4.4.0)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /anaconda3/lib/python3.7/site-packages (from nbformat>=4.4->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (3.0.1)
Requirement already satisfied: astroid<3,>=2.2.0 in /anaconda3/lib/python3.7/site-packages (from pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (2.2.5)
Requirement already satisfied: isort<5,>=4.2.5 in /anaconda3/lib/python3.7/site-packages (from pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (4.3.21)
Requirement already satisfied: mccabe<0.7,>=0.6 in /anaconda3/lib/python3.7/site-packages (from pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (0.6.1)
Requirement already satisfied: pyrsistent>=0.14.0 in /anaconda3/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling) (0.14.11)
Requirement already satisfied: lazy-object-proxy in /anaconda3/lib/python3.7/site-packages (from astroid<3,>=2.2.0->pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (1.4.1)
Requirement already satisfied: wrapt in /anaconda3/lib/python3.7/site-packages (from astroid<3,>=2.2.0->pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (1.11.2)
Requirement already satisfied: typed-ast>=1.3.0; implementation_name == "cpython" in /anaconda3/lib/python3.7/site-packages (from astroid<3,>=2.2.0->pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling) (1.4.0)
In [ ]:
 
In [12]:
import pandas_profiling
NHIS = pd.read_csv('/Users/bonnymcclain/Desktop/familyxx.csv')
pandas_profiling.ProfileReport(NHIS)