Default PipelineΒΆ
This notebook shows how you can use aikit to directly get a default pipeline that you can fit on your data
[1]:
from aikit.datasets.datasets import load_dataset, DatasetEnum
Xtrain, y_train, _ ,_ , _ = load_dataset(DatasetEnum.titanic)
[2]:
from aikit.ml_machine import get_default_pipeline
model = get_default_pipeline(Xtrain, y_train)
model
Matplotlib won't work
C:\HOMEWARE\Anaconda3-Windows-x86_64\lib\site-packages\gensim\utils.py:1197: UserWarning: detected Windows; aliasing chunkize to chunkize_serial
warnings.warn("detected Windows; aliasing chunkize to chunkize_serial")
[2]:
GraphPipeline(edges=[('ColumnsSelector', 'NumImputer'),
('CountVectorizerWrapper', 'NumImputer'),
('NumericalEncoder', 'NumImputer',
'RandomForestClassifier')],
models={'ColumnsSelector': ColumnsSelector(columns_to_drop=None,
columns_to_use=['pclass',
'age',
'sibsp',
'parch',
'fare',
'body'],
raise_if_shape_differs=True,
regex_match=False),
'CountVectorizerWrapper'...
'RandomForestClassifier': RandomForestClassifier(bootstrap=True,
class_weight=None,
criterion='gini',
max_depth=None,
max_features='auto',
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
min_samples_leaf=1,
min_samples_split=2,
min_weight_fraction_leaf=0.0,
n_estimators=100,
n_jobs=None,
oob_score=False,
random_state=123,
verbose=0,
warm_start=False)},
no_concat_nodes=None, verbose=False)
[3]:
model.graphviz
[3]:
[4]:
model.fit(Xtrain, y_train)
[4]:
GraphPipeline(edges=[('ColumnsSelector', 'NumImputer'),
('CountVectorizerWrapper', 'NumImputer'),
('NumericalEncoder', 'NumImputer',
'RandomForestClassifier')],
models={'ColumnsSelector': ColumnsSelector(columns_to_drop=None,
columns_to_use=['pclass',
'age',
'sibsp',
'parch',
'fare',
'body'],
raise_if_shape_differs=True,
regex_match=False),
'CountVectorizerWrapper'...
'RandomForestClassifier': RandomForestClassifier(bootstrap=True,
class_weight=None,
criterion='gini',
max_depth=None,
max_features='auto',
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
min_samples_leaf=1,
min_samples_split=2,
min_weight_fraction_leaf=0.0,
n_estimators=100,
n_jobs=None,
oob_score=False,
random_state=123,
verbose=0,
warm_start=False)},
no_concat_nodes=None, verbose=False)
[ ]: