Solved: How to transfer file from SharePoint to Lakehouse ...

DanSago · ‎12-02-2024

How can I transfer a file from SharePoint to my Lakehouse files folder using a Python notebook?

I am able to connect to SharePoint.

ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
  ctx = ClientContext(url, ctx_auth)
  web = ctx.web
  ctx.load(web)
  ctx.execute_query()
  print("Authentication successful")

The code above is working. The part I am missing is how to copy the file specified in the URL from SharePoint to the Lakehouse. Once I connect to the file, how do I save it to "lakehouse\files\source_files"?

DanSago · ‎12-03-2024

Building on yakush's answer, I found the solution. The key change was in Step 3, which enables transferring the SharePoint file to the Microsoft Fabric Lakehouse.

from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.authentication_context import AuthenticationContext
import os
import shutil

# SharePoint credentials
url = "https://your_sharepoint_url/sites/your_site"
username = "your_username"
password = "your_password"
file_url = "/sites/your_site/Shared Documents/your_file.xlsx"  # Update with the file's path
local_file_path = "your_file.xlsx"  # Temporary download location

# Step 1: Authenticate and connect to SharePoint
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
    ctx = ClientContext(url, ctx_auth)
    print("Authentication successful")
else:
    print("Authentication failed")
    exit()

# Step 2: Download the file from SharePoint
file = ctx.web.get_file_by_server_relative_url(file_url)
with open(local_file_path, "wb") as local_file:
    file.download(local_file).execute_query()
    print(f"Downloaded file to {local_file_path}")

# Step 3: Upload the file to the Lakehouse
# Mount the Lakehouse for direct file system access
lh = "lh_Eerie" # This is the lakehouse name
lakehouse = mssparkutils.lakehouse.get(lh)
mssparkutils.fs.mount(lakehouse.get("properties").get("abfsPath"), f"/{lh}")

# Retrieve and store local and ABFS paths of the mounted Lakehouse
local_path = mssparkutils.fs.getMountPath(f"/{lh}")

shutil.copy(local_file_path, f'{local_path}/Files/source_files/')

# Step 4: Clean up the local file
os.remove(local_file_path)
print("Temporary local file deleted.")

View solution in original post

DanSago · ‎12-03-2024

Building on yakush's answer, I found the solution. The key change was in Step 3, which enables transferring the SharePoint file to the Microsoft Fabric Lakehouse.

from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.authentication_context import AuthenticationContext
import os
import shutil

# SharePoint credentials
url = "https://your_sharepoint_url/sites/your_site"
username = "your_username"
password = "your_password"
file_url = "/sites/your_site/Shared Documents/your_file.xlsx"  # Update with the file's path
local_file_path = "your_file.xlsx"  # Temporary download location

# Step 1: Authenticate and connect to SharePoint
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
    ctx = ClientContext(url, ctx_auth)
    print("Authentication successful")
else:
    print("Authentication failed")
    exit()

# Step 2: Download the file from SharePoint
file = ctx.web.get_file_by_server_relative_url(file_url)
with open(local_file_path, "wb") as local_file:
    file.download(local_file).execute_query()
    print(f"Downloaded file to {local_file_path}")

# Step 3: Upload the file to the Lakehouse
# Mount the Lakehouse for direct file system access
lh = "lh_Eerie" # This is the lakehouse name
lakehouse = mssparkutils.lakehouse.get(lh)
mssparkutils.fs.mount(lakehouse.get("properties").get("abfsPath"), f"/{lh}")

# Retrieve and store local and ABFS paths of the mounted Lakehouse
local_path = mssparkutils.fs.getMountPath(f"/{lh}")

shutil.copy(local_file_path, f'{local_path}/Files/source_files/')

# Step 4: Clean up the local file
os.remove(local_file_path)
print("Temporary local file deleted.")