Join us at FabCon Atlanta from March 16 - 20, 2026, for the ultimate Fabric, Power BI, AI and SQL community-led event. Save $200 with code FABCOMM.
Register now!The Power BI Data Visualization World Championships is back! Get ahead of the game and start preparing now! Learn more
Hi guys,
I'm a bit new in python but my code is working on spyder or jupyter notebook.
The equivalent script I'm running on PowerBi is the following :
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold
from sklearn.model_selection import LeavePGroupsOut
from sklearn.model_selection import GroupKFold
from sklearn.pipeline import Pipeline
import random
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn import metrics
df_order_t = pd.read_csv('C:/Users/morgan.quezel-ambrun/Documents/Stage_MBA/order_products_train.csv', dtype={
'order_id': np.int32,
'product_id': np.uint16,
'add_to_cart_order': np.int16,
'reordered': np.int8}, engine='python')
products= pd.read_csv('C:/Users/morgan.quezel-ambrun/Documents/Stage_MBA/products.csv',dtype={
'product_id': np.uint16,
'product_name': np.str,
'aisle_id': np.uint8,
'department_id': np.uint8})
orders = pd.read_csv('C:/Users/morgan.quezel-ambrun/Documents/Stage_MBA/orders.csv',dtype={
'order_id': np.int32,
'user_id': np.int32,
'eval_set': 'category',
'order_number': np.int16,
'order_dow': np.int8,
'order_hour_of_day': np.int8,
'days_since_prior_order': np.float32})
aisles = pd.read_csv('C:/Users/morgan.quezel-ambrun/Documents/Stage_MBA/aisles.csv',dtype={
'aisle_id': np.uint8,
'aisle': np.str})
departments = pd.read_csv('C:/Users/morgan.quezel-ambrun/Documents/Stage_MBA/departments.csv',dtype={
'department_id': np.uint8,
'department':np.str})
orders_train = orders.loc[orders['eval_set'] == 'train']
merged_ = pd.merge(df_order_t,orders_train,on='order_id',how='left')
del merged_['eval_set']
del merged_['order_id']
import pandas as pd import numpy as np from sklearn import metrics from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold from sklearn.model_selection import LeavePGroupsOut from sklearn.model_selection import GroupKFold from sklearn.pipeline import Pipeline import random from sklearn import tree
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script:
# dataset = pandas.DataFrame(add_to_cart_order, days_since_prior_order, order_dow, order_hour_of_day, order_number, product_id, reordered, user_id)
# dataset = dataset.drop_duplicates()
# Paste or type your script code here:
X = dataset.values
y = dataset.reordered.values
groups = dataset.user_id.values
group_kfold = GroupKFold(n_splits=5)
group_kfold.get_n_splits(X, y, groups)
for train_index, test_index in group_kfold.split(X, y, groups):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
X_train = pd.DataFrame(X_train.astype(int), columns=['product_id',
'add_to_cart_order',
'reordered',
'user_id',
'order_number',
'order_dow',
'order_hour_of_day',
'days_since_prior_order'])
X_train = X_train.drop('reordered', 1)
X_train = X_train.drop('user_id',1)
X_train = X_train.drop("product_id",1)
X_test = pd.DataFrame(X_test.astype(int), columns=['product_id',
'add_to_cart_order',
'reordered',
'user_id',
'order_number',
'order_dow',
'order_hour_of_day',
'days_since_prior_order'])
X_test = X_test.drop('reordered', 1)
X_test = X_test.drop('user_id',1)
X_test = X_test.drop("product_id",1)
y_train = pd.DataFrame(y_train,columns=['reordered'])
y_test = pd.DataFrame(y_test,columns=['reordered'])
max_depths = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
training_accuracies = []
testing_accuracies = []
for max_depth in max_depths:
dtc = tree.DecisionTreeClassifier(criterion="entropy", max_depth=max_depth, min_samples_split=0.05)
dtc_fit = dtc.fit(X_train,y_train)
prediction_training = dtc_fit.predict(X_train)
training_accuracy = dtc_fit.score(X_train, y_train)
training_accuracies.append(training_accuracy)
prediction_testing = dtc_fit.predict(X_test)
testing_accuracy = dtc_fit.score(X_test, y_test)
testing_accuracies.append(testing_accuracy)
import matplotlib.pyplot as plt
plt.plot(max_depths, training_accuracies, max_depths, testing_accuracies,linewidth=2.0)
plt.title('Accuracy Score', fontsize = 20)
plt.xlabel("Max depth", fontsize=20)
plt.ylabel("Accuracy", fontsize = 20)
plt.legend(['Validation set', 'Train set'], loc='upper right')
plt.show()plt.axis([0, 21, 0.58, 0.7])My output turns to be empty
The Power BI Data Visualization World Championships is back! Get ahead of the game and start preparing now!
| User | Count |
|---|---|
| 3 | |
| 3 | |
| 2 | |
| 2 | |
| 1 |
| User | Count |
|---|---|
| 4 | |
| 4 | |
| 4 | |
| 3 | |
| 3 |