Introduction

Learning from WalkWithFastai's lesson on permutation importance and ensemble techniques we apply some of it to the homesite competition data

Notes:

Changed the categorize functions from last notebook to exclude any columns in y_names from being evaluated since these shouldn't be part of the model training as a parameter

Setup

!pip install -Uqq fastai

!pip install kaggle

Requirement already satisfied: kaggle in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (1.5.12)
Requirement already satisfied: certifi in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (2021.5.30)
Requirement already satisfied: python-dateutil in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (2.8.1)
Requirement already satisfied: urllib3 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (1.26.4)
Requirement already satisfied: python-slugify in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (5.0.2)
Requirement already satisfied: requests in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (2.25.1)
Requirement already satisfied: tqdm in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (4.61.1)
Requirement already satisfied: six>=1.10 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from kaggle) (1.16.0)
Requirement already satisfied: text-unidecode>=1.3 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)
Requirement already satisfied: chardet<5,>=3.0.2 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from requests->kaggle) (4.0.0)
Requirement already satisfied: idna<3,>=2.5 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from requests->kaggle) (2.10)

from fastai.tabular.all import *
from kaggle import api

Path.cwd()

Path('/mnt/d/Code/GitHub/team-fast-tabulous/_notebooks')

!touch .gitignore

!echo "_data" > .gitignore

!mkdir _data

mkdir: cannot create directory ‘_data’: File exists

os.chdir('_data')
Path.cwd()

Path('/mnt/d/Code/GitHub/team-fast-tabulous/_notebooks/_data')

path = Path.cwd()/"homesite_competition_data"
path.mkdir(exist_ok=True)
Path.BASE_PATH = path
api.competition_download_cli('homesite-quote-conversion', path=path)
file_extract(path/"homesite-quote-conversion.zip")
file_extract(path/"train.csv.zip")
file_extract(path/"test.csv.zip")
path.ls()

homesite-quote-conversion.zip: Skipping, found more recently modified local copy (use --force to force download)

(#6) [Path('homesite-quote-conversion.zip'),Path('sample_submission.csv.zip'),Path('test.csv'),Path('test.csv.zip'),Path('train.csv'),Path('train.csv.zip')]

Set my parameter defaults (may change)

random_seed = 42 
bs = 4096
val_bs = 512
test_size = 0.3
epochs = 3
lr = 1e-2
wd=0.002
layers = [10000,500]
dropout = [0.001, 0.01]
y_block=CategoryBlock()
emb_dropout=0.02
set_seed(42)

df_train = pd.read_csv(path/"train.csv", low_memory=False)
df_train.head(2)

df_train.shape

(260753, 299)

df_test = pd.read_csv(path/"test.csv", low_memory=False)
df_test.head(2)

df_test.shape

(173836, 298)

y_column = df_train.columns.difference(df_test.columns)
y_column

Index(['QuoteConversion_Flag'], dtype='object')

df_train.QuoteConversion_Flag = df_train.QuoteConversion_Flag.astype(dtype='boolean')

train_data_balance = pd.DataFrame(df_train["QuoteConversion_Flag"]).groupby("QuoteConversion_Flag")
train_data_balance["QuoteConversion_Flag"].describe()

Adding Tim's bits of insight

df_train = df_train.set_index('QuoteNumber')
df_test = df_test.set_index('QuoteNumber')

df_train['Original_Quote_Date'] = pd.to_datetime(df_train['Original_Quote_Date'])
df_test['Original_Quote_Date'] = pd.to_datetime(df_test['Original_Quote_Date'])
df_train = add_datepart(df_train, 'Original_Quote_Date')
df_test = add_datepart(df_test, 'Original_Quote_Date')

y_names = [y_column[0]]
cont_names, cat_names = cont_cat_split(df_train, dep_var=y_names)
len(cont_names), len(cat_names)

(155, 154)

triage = L()

def reassign_to_categorical(field, df, y_names, continuous, categorical, triage):
  if ((df[field].isna().sum()==0) and (field not in y_names)):
    field_categories = df[field].unique()
    df[field] = df[field].astype('category')
    df[field].cat.set_categories(field_categories, inplace=True)
    if field in continuous: continuous.remove(field)
    if field not in categorical: categorical.append(field)
  else:
    if field in continuous: continuous.remove(field)
    if field in categorical: categorical.remove(field)
    triage.append(field)

  return df, continuous, categorical, triage

def categorize( df, y_names, cont_names, cat_names, triage, category_threshold):
  for field in df.columns:
    if ((len(df[field].unique()) <= category_threshold) and (type(df[field].dtype) != pd.core.dtypes.dtypes.CategoricalDtype)):
      reassign_to_categorical(field, df, y_names, cont_names, cat_names, triage)
  return df, cont_names, cat_names, triage

df_train, cont_names, cat_names, triage = categorize(df_train, y_names, cont_names, cat_names, triage, 100)

"QuoteConversion_Flag" in cont_names, "QuoteConversion_Flag" in cat_names #Make sure we've gotten our y-column excluded

(False, False)

procs = [Categorify, FillMissing, Normalize]
splits = TrainTestSplitter(test_size=test_size, stratify=df_train[y_names])(df_train)

to = TabularPandas(df=df_train, procs=procs, cat_names=cat_names, 
                   cont_names=cont_names, y_names=y_names,splits=splits,
                  y_block=y_block)
dls = to.dataloaders(bs=bs, val_bs=val_bs, layers=layers, embed_ps=emb_dropout, ps=dropout)
dls.valid.show_batch()

learn = tabular_learner(dls, metrics=accuracy)

learn.lr_find(suggest_funcs=(valley, slide, minimum, steep))

SuggestedLRs(valley=tensor(0.0008), slide=tensor(0.0229), minimum=0.06309573650360108, steep=0.013182567432522774)

learn.fit_one_cycle(epochs,lr, wd=wd)

preds, targs = learn.get_preds()

accuracy(preds,targs)

TensorBase(0.9227)

class PermutationImportance():
  "Calculate and plot the permutation importance"
  def __init__(self, learn:Learner, df=None, bs=None):
    "Initialize with a test dataframe, a learner, and a metric"
    self.learn = learn
    self.df = df if df is not None else None
    bs = bs if bs is not None else learn.dls.bs
    self.dl = learn.dls.test_dl(self.df, bs=bs) if self.df is not None else learn.dls[1]
    self.x_names = learn.dls.x_names.filter(lambda x: '_na' not in x)
    self.na = learn.dls.x_names.filter(lambda x: '_na' in x)
    self.y = dls.y_names
    self.results = self.calc_feat_importance()
    self.plot_importance(self.ord_dic_to_df(self.results))

  def measure_col(self, name:str):
    "Measures change after column shuffle"
    col = [name]
    if f'{name}_na' in self.na: col.append(name)
    orig = self.dl.items[col].values
    perm = np.random.permutation(len(orig))
    self.dl.items[col] = self.dl.items[col].values[perm]
    metric = learn.validate(dl=self.dl)[1]
    self.dl.items[col] = orig
    return metric

  def calc_feat_importance(self):
    "Calculates permutation importance by shuffling a column on a percentage scale"
    print('Getting base error')
    base_error = self.learn.validate(dl=self.dl)[1]
    self.importance = {}
    pbar = progress_bar(self.x_names)
    print('Calculating Permutation Importance')
    for col in pbar:
      self.importance[col] = self.measure_col(col)
    for key, value in self.importance.items():
      self.importance[key] = (base_error-value)/base_error #this can be adjusted
    return OrderedDict(sorted(self.importance.items(), key=lambda kv: kv[1], reverse=True))

  def ord_dic_to_df(self, dict:OrderedDict):
    return pd.DataFrame([[k, v] for k, v in dict.items()], columns=['feature', 'importance'])

  def plot_importance(self, df:pd.DataFrame, limit=20, asc=False, **kwargs):
    "Plot importance with an optional limit to how many variables shown"
    df_copy = df.copy()
    df_copy['feature'] = df_copy['feature'].str.slice(0,25)
    df_copy = df_copy.sort_values(by='importance', ascending=asc)[:limit].sort_values(by='importance', ascending=not(asc))
    ax = df_copy.plot.barh(x='feature', y='importance', sort_columns=True, **kwargs)
    for p in ax.patches:
      ax.annotate(f'{p.get_width():.4f}', ((p.get_width() * 1.005), p.get_y()  * 1.005))

imp = PermutationImportance(learn)

Getting base error

Calculating Permutation Importance

From this most important fields are PropertyField37, PersonalField2, PersonalField1, SalesField5

Adding in XGBoost

import xgboost as xgb

n_estimators = 100
max_depth = 8
learning_rate = 0.1
subsample = 0.5

X_train, y_train = to.train.xs, to.train.ys.values.ravel()
X_valid, y_valid = to.valid.xs, to.valid.ys.values.ravel()

model = xgb.XGBClassifier(n_estimators = n_estimators, max_depth=max_depth, learning_rate=0.1, subsample=subsample)

xgb_model = model.fit(X_train, y_train)

[22:49:10] WARNING: /tmp/build/80754af9/xgboost-split_1619724447847/work/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.

xgb_preds = xgb_model.predict_proba(X_valid)

xgb_preds

array([[0.96717715, 0.03282287],
       [0.96896064, 0.03103935],
       [0.00723386, 0.99276614],
       ...,
       [0.87994975, 0.12005027],
       [0.00716239, 0.9928376 ],
       [0.90019906, 0.09980095]], dtype=float32)

accuracy(tensor(xgb_preds), tensor(y_valid))

TensorBase(0.9237)

from xgboost import plot_importance

plot_importance(xgb_model, height=1,max_num_features=20,)

<AxesSubplot:title={'center':'Feature importance'}, xlabel='F score', ylabel='Features'>

From this most important fields were SalesField1A, PersonalField9, Original_Quote_Elapsed, PersonalField10A, PersonalField10B, PropertyField37

Doing Ensemble

avgs = (preds + xgb_preds) / 2

avgs

tensor([[0.9761, 0.0239],
        [0.9612, 0.0388],
        [0.0040, 0.9960],
        ...,
        [0.9249, 0.0751],
        [0.0036, 0.9964],
        [0.9015, 0.0985]])

argmax = avgs.argmax(dim=1)

argmax

tensor([0, 0, 1,  ..., 0, 1, 0])

y_valid

array([0, 0, 1, ..., 0, 1, 0], dtype=int8)

accuracy(tensor(preds), tensor(y_valid))

TensorBase(0.9227)

accuracy(tensor(xgb_preds), tensor(y_valid))

TensorBase(0.9237)

accuracy(tensor(avgs), tensor(y_valid))

TensorBase(0.9246)

So we have a slightly better performance with ensembling these two

Adding Random Forest

from sklearn.ensemble import RandomForestClassifier

tree = RandomForestClassifier(n_estimators=100)

tree.fit(X_train, y_train)

RandomForestClassifier()

!pip install rfpimp

Collecting rfpimp
  Downloading rfpimp-1.3.7.tar.gz (10 kB)
Requirement already satisfied: numpy in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from rfpimp) (1.19.2)
Requirement already satisfied: pandas in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from rfpimp) (1.2.4)
Requirement already satisfied: scikit-learn in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from rfpimp) (0.24.2)
Requirement already satisfied: matplotlib in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from rfpimp) (3.3.4)
Requirement already satisfied: kiwisolver>=1.0.1 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from matplotlib->rfpimp) (1.3.1)
Requirement already satisfied: pillow>=6.2.0 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from matplotlib->rfpimp) (8.2.0)
Requirement already satisfied: python-dateutil>=2.1 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from matplotlib->rfpimp) (2.8.1)
Requirement already satisfied: cycler>=0.10 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from matplotlib->rfpimp) (0.10.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from matplotlib->rfpimp) (2.4.7)
Requirement already satisfied: six in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from cycler>=0.10->matplotlib->rfpimp) (1.16.0)
Requirement already satisfied: pytz>=2017.3 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from pandas->rfpimp) (2021.1)
Requirement already satisfied: joblib>=0.11 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from scikit-learn->rfpimp) (1.0.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from scikit-learn->rfpimp) (2.1.0)
Requirement already satisfied: scipy>=0.19.1 in /home/nissan/miniconda3/envs/fastai/lib/python3.9/site-packages (from scikit-learn->rfpimp) (1.6.2)
Building wheels for collected packages: rfpimp
  Building wheel for rfpimp (setup.py) ... done
  Created wheel for rfpimp: filename=rfpimp-1.3.7-py3-none-any.whl size=10668 sha256=34224b08f42a6e9b6ed748abc732400eaff9bf484bb215a1f3b6fea8cabd1d49
  Stored in directory: /home/nissan/.cache/pip/wheels/d9/f2/53/6d8c73011f73fc347598d683ff8b2343605ad43474ae083816
Successfully built rfpimp
Installing collected packages: rfpimp
Successfully installed rfpimp-1.3.7

from rfpimp import *

impTree = importances(tree, X_valid, to.valid.ys)

plot_importances(impTree)

So here the most important are PropertyField37, Field7,PersonalField1, SalesField5, PersonalField9,PersonalField2

forest_preds = tree.predict_proba(X_valid)

forest_preds

array([[0.95, 0.05],
       [0.91, 0.09],
       [0.32, 0.68],
       ...,
       [0.85, 0.15],
       [0.34, 0.66],
       [0.88, 0.12]])

accuracy(tensor(forest_preds), tensor(y_valid))

TensorBase(0.9152)

new_avgs =  (preds + xgb_preds + forest_preds) / 3

accuracy(tensor(new_avgs), tensor(y_valid))

TensorBase(0.9242)

So it gets slightly worse when we add Random Forest to the ensemble.

Next step will be to apply the models to the test set from Kaggle and try submissions to see how they score

	QuoteNumber	Original_Quote_Date	QuoteConversion_Flag	Field6	Field7	Field8	Field9	Field10	Field11	Field12	...	GeographicField59A	GeographicField59B	GeographicField60A	GeographicField60B	GeographicField61A	GeographicField61B	GeographicField62A	GeographicField62B	GeographicField63	GeographicField64
0	1	2013-08-16	0	B	23	0.9403	0.0006	965	1.0200	N	...	9	9	-1	8	-1	18	-1	10	N	CA
1	2	2014-04-22	0	F	7	1.0006	0.0040	548	1.2433	N	...	10	10	-1	11	-1	17	-1	20	N	NJ

	QuoteNumber	Original_Quote_Date	Field6	Field7	Field8	Field9	Field10	Field11	Field12	CoverageField1A	...	GeographicField59A	GeographicField59B	GeographicField60A	GeographicField60B	GeographicField61A	GeographicField61B	GeographicField62A	GeographicField62B	GeographicField63	GeographicField64
0	3	2014-08-12	E	16	0.9364	0.0006	1,487	1.3045	N	4	...	1	1	-1	1	-1	20	-1	25	Y	IL
1	5	2013-09-07	F	11	0.9919	0.0038	564	1.1886	N	8	...	10	10	-1	5	-1	5	-1	21	N	NJ

	count	unique	top	freq
QuoteConversion_Flag
False	211859	1	False	211859
True	48894	1	True	48894

epoch	train_loss	valid_loss	accuracy	time
0	0.342952	0.215471	0.909276	00:15
1	0.227020	0.178934	0.922762	00:15
2	0.184288	0.179401	0.922698	00:14

	Field6	Field10	Field12	CoverageField5A	CoverageField5B	CoverageField6A	CoverageField6B	CoverageField8	CoverageField9	SalesField3	SalesField4	SalesField5	SalesField7	SalesField9	SalesField10	SalesField11	SalesField14	SalesField15	PersonalField1	PersonalField2	PersonalField5	PersonalField6	PersonalField8	PersonalField9	PersonalField11	PersonalField12	PersonalField13	PersonalField16	PersonalField17	PersonalField18	PersonalField19	PersonalField22	PersonalField27	PersonalField28	PersonalField29	PersonalField30	PersonalField31	PersonalField32	PersonalField33	PersonalField34	PersonalField43	PersonalField44	PersonalField45	PersonalField46	PersonalField47	PersonalField48	PersonalField53	PersonalField58	PersonalField63	PersonalField68	PersonalField73	PersonalField74	PersonalField75	PersonalField76	PersonalField77	PersonalField78	PersonalField83	PropertyField2A	PropertyField5	PropertyField7	PropertyField8	PropertyField10	PropertyField11A	PropertyField11B	PropertyField12	PropertyField13	PropertyField14	PropertyField15	PropertyField17	PropertyField18	PropertyField19	PropertyField22	PropertyField23	PropertyField27	PropertyField28	PropertyField30	PropertyField31	PropertyField33	PropertyField35	PropertyField37	GeographicField5A	GeographicField5B	GeographicField10A	GeographicField10B	GeographicField14A	GeographicField14B	GeographicField18A	GeographicField21A	GeographicField22A	GeographicField22B	GeographicField23A	GeographicField56A	GeographicField60A	GeographicField61A	GeographicField62A	GeographicField62B	GeographicField63	GeographicField64	Original_Quote_Year	Original_Quote_Month	Original_Quote_Dayofweek	Original_Quote_Is_month_end	Original_Quote_Is_month_start	Original_Quote_Is_quarter_end	Original_Quote_Is_quarter_start	Original_Quote_Is_year_end	Original_Quote_Is_year_start	Field7	Field8	Field9	Field11	CoverageField1A	CoverageField1B	CoverageField2A	CoverageField2B	CoverageField3A	CoverageField3B	CoverageField4A	CoverageField4B	CoverageField11A	CoverageField11B	SalesField1A	SalesField1B	SalesField2A	SalesField2B	SalesField6	SalesField12	PersonalField4A	PersonalField4B	PersonalField10A	PersonalField10B	PersonalField14	PersonalField15	PropertyField1A	PropertyField1B	PropertyField2B	PropertyField16A	PropertyField16B	PropertyField21A	PropertyField21B	PropertyField24A	PropertyField24B	PropertyField25	PropertyField26A	PropertyField26B	PropertyField39A	PropertyField39B	GeographicField1A	GeographicField1B	GeographicField2A	GeographicField2B	GeographicField3A	GeographicField3B	GeographicField4A	GeographicField4B	GeographicField6A	GeographicField6B	GeographicField7A	GeographicField7B	GeographicField8A	GeographicField8B	GeographicField9A	GeographicField9B	GeographicField11A	GeographicField11B	GeographicField12A	GeographicField12B	GeographicField13A	GeographicField13B	GeographicField15A	GeographicField15B	GeographicField16A	GeographicField16B	GeographicField17A	GeographicField17B	GeographicField18B	GeographicField19A	GeographicField19B	GeographicField20A	GeographicField20B	GeographicField21B	GeographicField23B	GeographicField24A	GeographicField24B	GeographicField25A	GeographicField25B	GeographicField26A	GeographicField26B	GeographicField27A	GeographicField27B	GeographicField28A	GeographicField28B	GeographicField29A	GeographicField29B	GeographicField30A	GeographicField30B	GeographicField31A	GeographicField31B	GeographicField32A	GeographicField32B	GeographicField33A	GeographicField33B	GeographicField34A	GeographicField34B	GeographicField35A	GeographicField35B	GeographicField36A	GeographicField36B	GeographicField37A	GeographicField37B	GeographicField38A	GeographicField38B	GeographicField39A	GeographicField39B	GeographicField40A	GeographicField40B	GeographicField41A	GeographicField41B	GeographicField42A	GeographicField42B	GeographicField43A	GeographicField43B	GeographicField44A	GeographicField44B	GeographicField45A	GeographicField45B	GeographicField46A	GeographicField46B	GeographicField47A	GeographicField47B	GeographicField48A	GeographicField48B	GeographicField49A	GeographicField49B	GeographicField50A	GeographicField50B	GeographicField51A	GeographicField51B	GeographicField52A	GeographicField52B	GeographicField53A	GeographicField53B	GeographicField54A	GeographicField54B	GeographicField55A	GeographicField55B	GeographicField56B	GeographicField57A	GeographicField57B	GeographicField58A	GeographicField58B	GeographicField59A	GeographicField59B	GeographicField60B	GeographicField61B	Original_Quote_Week	Original_Quote_Day	SalesField8	Original_Quote_Dayofyear	Original_Quote_Elapsed	QuoteConversion_Flag
0	B	935	N	13	22	13	23	T	D	1	5	5	M	1	0	0	0	0	1	1	6	1	1	2	0	1	2	ZA	ZE	XR	XD	1	1	2	2	0	1	1	1	1	1	0	1	1	1	2	1	1	1	1	1	0	1	1	1	2	1	-1	Y	O	1	1	-1	21	4	2	C	1	1	2	0	2	4	4	B	N	K	G	2	N	-1	13	-1	25	-1	7	-1	-1	-1	15	-1	-1	-1	-1	-1	9	N	CA	2014	2	6	False	False	False	False	False	False	25	0.9403	0.0007	1.02	9	15	9	15	8	13	8	14	3	5	4	11	3	10	11	0	4	5	14	21	2	24	7	10	23	3	6	9	15	8	11	1.0	14	20	8	7	7	15	11	10	5	4	13	13	2	5	6	7	2	7	2	2	2	7	4	7	2	7	9	8	3	7	2	18	2	2	4	3	7	8	8	16	14	18	18	20	22	15	11	10	17	14	21	5	6	10	15	11	18	7	9	16	21	3	3	6	5	2	3	7	10	7	15	3	8	18	16	7	7	1	1	7	2	7	7	13	15	1	1	10	7	12	6	8	4	9	7	9	7	13	7	13	12	14	22	18	9	6	15	20	10	10	3	23	6	9	61662.999557	40.000002	1.391904e+09	False
1	E	1,480	N	13	22	13	23	T	F	0	5	5	R	1	0	0	0	0	1	1	7	0	1	2	0	1	2	YF	ZK	XK	XC	1	0	1	1	0	0	0	0	1	1	0	0	0	0	1	1	1	1	1	1	0	0	0	0	1	1	-1	Y	R	1	1	-1	21	2	2	C	4	1	2	0	2	1	10	B	N	O	H	2	N	-1	13	-1	25	-1	23	-1	-1	-1	17	-1	-1	-1	-1	-1	8	N	IL	2013	8	5	True	False	False	False	False	False	14	0.9487	0.0006	1.3045	6	9	6	9	5	8	6	9	4	8	3	8	4	12	4	0	19	22	6	12	2	7	6	9	3	4	8	6	9	10	15	1.0	17	23	18	22	6	13	14	13	13	16	9	7	13	22	16	19	13	20	25	25	11	19	16	19	11	19	15	19	15	23	3	18	18	15	16	2	5	24	19	2	1	2	2	2	2	2	1	6	8	6	6	8	15	12	20	7	11	5	5	21	24	21	24	7	8	4	9	11	18	14	23	25	25	15	13	25	25	12	20	13	13	7	7	21	23	5	3	18	18	17	13	13	9	17	18	17	19	20	18	8	5	4	6	12	10	8	12	14	18	22	16	24	35	31	1129.999604	242.999997	1.377907e+09	False
2	F	548	N	13	22	25	25	Y	E	1	5	5	P	0	0	1	1	1	1	1	7	0	1	2	0	1	2	ZH	XV	YF	XX	1	0	1	1	0	0	0	0	1	1	0	0	0	0	1	1	1	1	1	1	0	0	0	0	1	1	-1	Y	A	1	1	-1	21	1	2	A	1	1	1	1	2	1	4	B	N	O	H	2	Y	-1	18	-1	25	-1	17	-1	-1	-1	18	-1	-1	-1	-1	-1	8	N	NJ	2015	2	3	False	False	False	False	False	False	22	0.9893	0.004	1.2433	7	12	7	12	10	17	7	11	8	17	4	10	4	12	11	1	3	3	6	13	1	10	24	25	12	3	6	7	12	9	13	2.0	6	7	13	15	8	17	14	13	7	6	13	13	5	16	10	16	6	16	6	14	4	13	10	16	5	16	11	17	7	16	2	13	16	25	25	25	25	18	17	21	22	18	19	13	17	22	21	8	14	8	12	10	20	8	11	12	19	5	5	11	13	14	22	12	20	4	7	12	20	14	22	10	17	23	20	14	18	9	11	20	21	9	12	20	22	6	7	17	16	14	8	14	10	17	19	17	18	17	13	9	6	7	14	2	15	17	17	22	16	20	8	24	9	26	30835.999977	56.999998	1.424909e+09	True
3	B	935	N	13	22	13	23	Y	D	1	5	5	T	0	0	0	0	0	1	1	7	0	1	1	1	4	2	ZA	ZE	XR	XD	1	1	2	2	0	0	1	1	1	1	0	0	1	1	2	1	1	1	1	1	0	0	1	1	2	1	-1	Y	O	1	1	-1	21	4	2	C	1	1	2	0	2	1	10	B	N	O	H	2	N	-1	13	-1	25	-1	7	-1	-1	-1	15	-1	-1	-1	-1	-1	21	N	CA	2014	11	2	False	False	False	False	False	False	25	0.9153	0.0007	1.02	3	3	3	3	3	3	3	3	14	23	2	1	3	10	11	0	20	24	3	1	2	24	12	17	25	3	4	3	3	3	3	1.0	8	10	12	15	14	23	11	10	5	4	20	22	2	4	8	10	2	10	1	1	2	7	6	10	2	10	11	18	3	10	2	17	14	3	5	3	7	9	8	22	23	25	25	25	25	24	23	11	18	7	9	11	21	2	1	6	10	4	4	16	21	9	16	14	21	19	24	13	20	3	3	2	2	1	1	6	6	1	1	8	3	9	9	16	17	1	1	10	7	15	10	12	8	9	6	8	6	11	5	13	12	15	22	19	9	6	5	3	4	2	6	24	47	19	21377.999465	322.999999	1.416355e+09	False
4	J	1,113	N	13	22	1	6	X	G	0	5	5	T	0	1	1	0	0	1	1	6	1	1	2	0	1	2	YF	ZK	XN	XC	1	0	1	1	0	0	0	0	1	1	0	0	0	0	1	1	1	1	1	1	0	0	0	0	1	1	-1	Y	J	1	1	-1	21	4	2	C	1	1	2	1	2	4	10	B	N	O	G	0	N	25	25	-1	25	-1	17	-1	-1	-1	15	-1	-1	-1	-1	-1	8	N	TX	2014	11	1	False	False	False	False	False	False	26	0.887	0.0004	1.2665	10	16	10	16	8	14	9	16	18	24	17	24	14	24	22	1	17	21	15	21	2	7	10	14	8	5	10	10	16	11	16	1.0	18	23	11	12	2	2	10	8	2	2	14	15	11	19	21	20	12	19	17	20	12	20	19	19	13	20	23	22	10	19	2	6	3	7	9	3	5	5	19	20	21	14	11	7	8	18	17	4	4	9	12	5	5	5	5	4	4	6	7	3	2	4	5	5	4	4	9	5	5	6	10	8	13	14	10	14	18	25	25	8	4	12	20	13	15	8	15	13	10	18	16	16	14	13	12	12	12	17	12	8	5	4	8	15	18	22	1	1	7	6	1	10	45	4	29808.999885	307.999995	1.415059e+09	False
5	B	935	N	13	22	13	23	Y	E	1	5	4	T	0	0	0	0	0	1	1	7	0	1	2	0	3	2	ZA	ZE	XR	XD	1	1	2	2	0	0	1	1	1	1	0	0	1	1	2	1	1	1	1	1	0	0	1	1	2	1	-1	Y	O	0	1	-1	21	2	1	C	2	1	2	0	2	1	10	D	N	O	G	2	Y	-1	13	-1	25	-1	7	-1	-1	-1	15	-1	-1	-1	-1	-1	21	N	CA	2014	9	1	False	False	False	False	False	False	25	0.9153	0.0007	1.02	9	15	9	15	8	13	8	15	7	15	5	14	4	13	11	0	4	5	5	4	4	24	17	21	22	14	22	9	15	9	13	1.0	16	21	10	10	11	21	15	14	14	16	6	3	2	6	4	6	2	6	2	8	2	4	3	5	2	5	6	6	2	6	22	25	24	4	8	6	17	8	6	17	16	19	22	16	19	21	20	9	15	17	23	11	21	21	24	4	5	12	19	10	11	12	20	13	20	4	7	4	3	9	17	2	4	14	11	4	3	2	2	9	7	20	22	9	5	9	18	13	10	15	10	16	14	13	12	12	11	15	9	19	21	11	19	20	11	8	8	7	10	10	19	24	36	2	20190.000282	245.000000	1.409616e+09	True
6	F	564	N	13	22	13	23	Y	E	1	4	3	V	1	0	1	0	0	1	1	6	1	1	2	0	1	2	XM	ZF	XZ	YF	1	0	1	1	0	0	0	0	1	1	0	0	0	0	1	1	1	1	1	1	0	0	0	0	1	1	-1	Y	R	0	1	-1	21	2	0	B	2	0	1	0	2	1	10	B	N	N	G	2	Y	-1	21	-1	25	-1	16	-1	-1	-1	16	-1	-1	-1	-1	-1	8	N	NJ	2013	11	4	False	False	False	False	False	False	11	0.9919	0.0038	1.1886	1	1	1	1	2	1	1	1	24	25	1	1	6	18	16	2	1	1	6	9	1	10	13	17	23	2	2	1	1	2	2	1.0	7	8	24	25	2	4	11	9	4	4	17	19	5	16	10	15	5	16	7	16	4	12	9	14	5	14	11	17	7	16	2	9	13	19	18	20	22	16	19	18	17	14	13	7	10	19	18	8	13	11	16	12	22	4	4	6	10	6	7	10	12	12	20	17	23	6	15	12	20	9	18	10	16	23	20	12	15	9	10	16	17	6	7	17	19	6	7	13	11	8	2	9	5	14	15	14	13	12	7	12	11	6	11	21	1	1	5	3	13	15	2	5	48	29	50545.000349	332.999998	1.385683e+09	True
7	F	548	N	13	22	13	23	T	E	1	5	5	Q	0	0	0	0	0	1	1	6	1	1	3	0	5	2	XM	YV	ZW	XC	1	0	1	1	0	0	0	0	1	1	0	0	0	0	1	1	1	1	1	1	0	0	0	0	1	1	-1	Y	R	0	1	-1	21	3	0	B	8	0	1	1	2	8	4	B	N	O	H	2	Y	-1	15	-1	25	-1	14	-1	-1	-1	22	-1	-1	-1	-1	25	25	N	NJ	2014	4	3	False	False	False	False	False	False	7	1.0006	0.004	1.2433	8	14	8	14	7	12	8	14	3	5	4	13	4	13	11	0	5	5	-1	-1	3	7	20	23	19	3	3	8	14	10	15	2.0	6	8	12	13	2	1	16	16	8	8	7	4	5	16	8	10	5	15	8	16	5	16	8	10	5	16	9	8	5	10	2	9	5	22	21	18	20	21	17	11	5	8	3	4	3	11	3	5	5	2	1	10	19	3	3	6	9	3	2	12	16	5	5	9	14	7	16	12	19	10	19	11	18	22	19	13	17	10	13	21	22	10	14	20	22	6	6	18	18	15	10	14	10	14	14	14	14	17	13	11	9	9	17	14	17	20	1	1	23	25	10	2	16	17	34038.000009	107.000001	1.397693e+09	False
8	B	965	N	13	22	1	6	Y	E	0	4	3	P	0	0	0	0	0	0	0	7	0	1	2	0	1	2	ZA	ZE	XR	XD	1	1	2	3	1	1	1	1	1	1	1	1	1	1	3	1	1	1	1	1	1	1	1	1	3	1	-1	Y	O	0	1	-1	21	2	2	C	4	1	2	0	2	1	10	B	N	O	H	2	N	-1	13	-1	25	-1	9	-1	-1	-1	15	-1	-1	-1	-1	-1	18	N	CA	2013	3	1	False	False	False	False	False	False	19	0.9403	0.0006	1.02	7	12	7	12	10	18	7	12	8	17	3	5	3	6	14	0	25	25	6	13	1	24	16	21	9	16	23	7	12	7	9	1.0	13	18	9	8	11	21	18	18	16	19	10	8	2	7	8	10	2	10	2	3	2	8	6	9	2	9	11	19	3	10	5	20	20	5	9	8	18	4	4	13	8	14	11	11	16	14	9	5	6	17	23	7	10	16	23	10	16	17	23	7	5	25	25	15	22	8	18	13	20	2	2	3	6	2	4	7	8	11	16	15	17	2	3	10	11	21	23	18	18	18	15	19	19	19	21	19	21	20	18	21	23	5	10	2	16	18	4	3	16	20	23	9	10	5	33851.000000	64.000001	1.362442e+09	True
9	J	1,113	N	1	2	1	6	T	I	1	5	5	T	0	0	0	0	0	1	1	7	0	1	3	0	5	2	ZD	XE	ZN	YF	1	0	1	1	0	0	0	0	1	1	0	0	0	0	1	1	1	1	1	1	0	0	0	0	1	1	-1	Y	R	1	1	-1	21	4	2	C	4	0	2	1	2	1	6	B	N	M	F	2	Y	25	25	-1	25	-1	22	-1	-1	-1	15	-1	-1	-1	-1	-1	9	N	TX	2014	9	2	False	False	False	False	False	False	26	0.8928	0.0004	1.2665	5	6	5	6	4	5	4	6	5	10	12	23	16	24	11	0	3	3	-1	-1	4	13	3	4	13	3	4	5	6	12	18	2.0	8	10	25	25	2	3	6	4	8	8	16	18	13	21	20	19	14	20	21	23	11	20	19	19	12	20	23	20	15	23	2	6	5	10	12	8	18	10	19	20	21	13	11	10	15	15	9	5	5	8	11	3	2	11	17	5	7	6	7	2	2	7	12	2	2	5	11	2	1	5	9	1	1	14	12	7	7	18	23	8	5	12	20	5	2	8	15	7	4	19	17	18	18	9	6	9	7	18	15	5	2	5	10	18	17	20	13	16	5	4	4	11	37	10	23712.000008	253.000002	1.410307e+09	False