examples/materialization/model_training.py - hamilton - Git at Google

 from typing import Dict

 import numpy as np
 from sklearn import base, linear_model, metrics, svm
 from sklearn.model_selection import train_test_split

 from hamilton import function_modifiers


 @function_modifiers.config.when(clf="svm")
 def prefit_clf__svm(gamma: float = 0.001) -> base.ClassifierMixin:
     """Returns an unfitted SVM classifier object.

     :param gamma: ...
     :return:
     """
     return svm.SVC(gamma=gamma)


 @function_modifiers.config.when(clf="logistic")
 def prefit_clf__logreg(penalty: str) -> base.ClassifierMixin:
     """Returns an unfitted Logistic Regression classifier object.

     :param penalty:
     :return:
     """
     return linear_model.LogisticRegression(penalty)


 @function_modifiers.extract_fields(
     {"X_train": np.ndarray, "X_test": np.ndarray, "y_train": np.ndarray, "y_test": np.ndarray}
 )
 def train_test_split_func(
     feature_matrix: np.ndarray,
     target: np.ndarray,
     test_size_fraction: float,
     shuffle_train_test_split: bool,
 ) -> Dict[str, np.ndarray]:
     """Function that creates the training & test splits.

     It this then extracted out into constituent components and used downstream.

     :param feature_matrix:
     :param target:
     :param test_size_fraction:
     :param shuffle_train_test_split:
     :return:
     """
     X_train, X_test, y_train, y_test = train_test_split(
         feature_matrix, target, test_size=test_size_fraction, shuffle=shuffle_train_test_split
     )
     return {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test}


 def y_test_with_labels(y_test: np.ndarray, target_names: np.ndarray) -> np.ndarray:
     """Adds labels to the target output."""
     return np.array([target_names[idx] for idx in y_test])


 def fit_clf(
     prefit_clf: base.ClassifierMixin, X_train: np.ndarray, y_train: np.ndarray
 ) -> base.ClassifierMixin:
     """Calls fit on the classifier object; it mutates it."""
     prefit_clf.fit(X_train, y_train)
     return prefit_clf


 def predicted_output(fit_clf: base.ClassifierMixin, X_test: np.ndarray) -> np.ndarray:
     """Exercised the fit classifier to perform a prediction."""
     return fit_clf.predict(X_test)


 def predicted_output_with_labels(
     predicted_output: np.ndarray, target_names: np.ndarray
 ) -> np.ndarray:
     """Replaces the predictions with the desired labels."""
     return np.array([target_names[idx] for idx in predicted_output])


 def classification_report(
     predicted_output_with_labels: np.ndarray, y_test_with_labels: np.ndarray
 ) -> str:
     """Returns a classification report."""
     return metrics.classification_report(y_test_with_labels, predicted_output_with_labels)


 def confusion_matrix(
     predicted_output_with_labels: np.ndarray, y_test_with_labels: np.ndarray
 ) -> str:
     """Returns a confusion matrix report."""
     return metrics.confusion_matrix(y_test_with_labels, predicted_output_with_labels)


 def model_parameters(fit_clf: base.ClassifierMixin) -> dict:
     """Returns a dictionary of model parameters."""
     return fit_clf.get_params()
	from typing import Dict

	import numpy as np
	from sklearn import base, linear_model, metrics, svm
	from sklearn.model_selection import train_test_split

	from hamilton import function_modifiers


	@function_modifiers.config.when(clf="svm")
	def prefit_clf__svm(gamma: float = 0.001) -> base.ClassifierMixin:
	"""Returns an unfitted SVM classifier object.

	:param gamma: ...
	:return:
	"""
	return svm.SVC(gamma=gamma)


	@function_modifiers.config.when(clf="logistic")
	def prefit_clf__logreg(penalty: str) -> base.ClassifierMixin:
	"""Returns an unfitted Logistic Regression classifier object.

	:param penalty:
	:return:
	"""
	return linear_model.LogisticRegression(penalty)


	@function_modifiers.extract_fields(
	{"X_train": np.ndarray, "X_test": np.ndarray, "y_train": np.ndarray, "y_test": np.ndarray}
	)
	def train_test_split_func(
	feature_matrix: np.ndarray,
	target: np.ndarray,
	test_size_fraction: float,
	shuffle_train_test_split: bool,
	) -> Dict[str, np.ndarray]:
	"""Function that creates the training & test splits.

	It this then extracted out into constituent components and used downstream.

	:param feature_matrix:
	:param target:
	:param test_size_fraction:
	:param shuffle_train_test_split:
	:return:
	"""
	X_train, X_test, y_train, y_test = train_test_split(
	feature_matrix, target, test_size=test_size_fraction, shuffle=shuffle_train_test_split
	)
	return {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test}


	def y_test_with_labels(y_test: np.ndarray, target_names: np.ndarray) -> np.ndarray:
	"""Adds labels to the target output."""
	return np.array([target_names[idx] for idx in y_test])


	def fit_clf(
	prefit_clf: base.ClassifierMixin, X_train: np.ndarray, y_train: np.ndarray
	) -> base.ClassifierMixin:
	"""Calls fit on the classifier object; it mutates it."""
	prefit_clf.fit(X_train, y_train)
	return prefit_clf


	def predicted_output(fit_clf: base.ClassifierMixin, X_test: np.ndarray) -> np.ndarray:
	"""Exercised the fit classifier to perform a prediction."""
	return fit_clf.predict(X_test)


	def predicted_output_with_labels(
	predicted_output: np.ndarray, target_names: np.ndarray
	) -> np.ndarray:
	"""Replaces the predictions with the desired labels."""
	return np.array([target_names[idx] for idx in predicted_output])


	def classification_report(
	predicted_output_with_labels: np.ndarray, y_test_with_labels: np.ndarray
	) -> str:
	"""Returns a classification report."""
	return metrics.classification_report(y_test_with_labels, predicted_output_with_labels)


	def confusion_matrix(
	predicted_output_with_labels: np.ndarray, y_test_with_labels: np.ndarray
	) -> str:
	"""Returns a confusion matrix report."""
	return metrics.confusion_matrix(y_test_with_labels, predicted_output_with_labels)


	def model_parameters(fit_clf: base.ClassifierMixin) -> dict:
	"""Returns a dictionary of model parameters."""
	return fit_clf.get_params()