<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database in Data Engineering</title>
    <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4386676#M6676</link>
    <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;why spark session is created 3 times?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I know it might not make lot of difference, but still asking&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 29 Jan 2025 16:57:30 GMT</pubDate>
    <dc:creator>nilendraFabric</dc:creator>
    <dc:date>2025-01-29T16:57:30Z</dc:date>
    <item>
      <title>Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4386217#M6668</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I've been unsuccessfully trying to integrate data&amp;nbsp;Microsoft Fabric's Lakehouse with Neo4j Graph Database.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have run this code snippets:&amp;nbsp;&lt;A href="https://github.com/neo4j-partners/neo4j-microsoft-fabric" target="_blank" rel="noopener"&gt;neo4j-partners/neo4j-microsoft-fabric&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My code looks like that:&lt;/P&gt;&lt;P&gt;cell 1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Neo4jSparkConnector") \
    .config("spark.jars.packages", "org.neo4j:neo4j-connector-apache-spark_2.12:5.3.2_for_spark_3") \
    .getOrCreate()&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;cell 2&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%spark
import org.apache.spark.sql.SparkSession

val spark = SparkSession.builder()
  .appName("Neo4j Notebook")
  .config("spark.jars.packages", "org.neo4j:neo4j-connector-apache-spark_2.12:4.0.0_for_spark_3")
  .getOrCreate()&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;import org.apache.spark.sql.SparkSession

spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@7c037b94&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;cell 3&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%spark

// Set base path
val absfss_Base_Path = "abfss://Neo4j_Workspace1@onelake.dfs.fabric.microsoft.com/Northwind_Lakehouse.Lakehouse/Files/Northwind/"

// Import required libraries
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper

// Create Spark session
val spark = SparkSession.builder().appName("Neo4j Notebook").getOrCreate()&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;absfss_Base_Path: String = abfss://Neo4j_Workspace1@onelake.dfs.fabric.microsoft.com/Northwind_Lakehouse.Lakehouse/Files/Northwind/
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@5dfa3492&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;cell 4&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%spark
// Read Northwind data files
val customerDF = spark.read.option("header", true).csv(absfss_Base_Path + "customers.csv")
val supplierDF = spark.read.option("header", true).csv(absfss_Base_Path + "suppliers.csv")
val stagedOrderDF = spark.read.option("header", true).csv(absfss_Base_Path + "orders.csv")
  .withColumn("addressID", concat_ws(", ", col("shipName"), col("shipAddress"), 
  col("shipCity"), col("shipRegion"), col("shipPostalCode"), col("shipCountry")))
val orderDetailDF = spark.read.option("header", true).csv(absfss_Base_Path + "order-details.csv")
val productDF = spark.read.option("header", true).csv(absfss_Base_Path + "products.csv")
val categoryDF = spark.read.option("header", true).csv(absfss_Base_Path + "categories.csv")

//create seperate addressesDF and finalize orderDF
val addressDF = stagedOrderDF
 .select($"addressID", 
    $"shipName".alias("name"), 
    $"shipAddress".alias("address"), 
    $"shipCity".alias("city"), 
    $"shipRegion".alias("region"), 
    $"shipPostalCode".alias("postalCode"), 
    $"shipCountry".alias("country"))
 .dropDuplicates("addressID")
val orderDF = stagedOrderDF.drop("shipName","shipAddress", "shipCity", "shipRegion", "shipPostalCode", "shipCountry")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;customerDF: org.apache.spark.sql.DataFrame = [customerID: string, companyName: string ... 10 more fields]
supplierDF: org.apache.spark.sql.DataFrame = [supplierID: string, companyName: string ... 10 more fields]
stagedOrderDF: org.apache.spark.sql.DataFrame = [orderID: string, customerID: string ... 13 more fields]
orderDetailDF: org.apache.spark.sql.DataFrame = [orderID: string, productID: string ... 3 more fields]
productDF: org.apache.spark.sql.DataFrame = [productID: string, productName: string ... 8 more fields]
categoryDF: org.apache.spark.sql.DataFrame = [categoryID: string, categoryName: string ... 2 more fields]
addressDF: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [addressID: string, name: string ... 5 more fields]
orderDF: org.apache.spark.sql.DataFrame = [orderID: string, customerID: string ... 7 more fields]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;cell 5&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%spark
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

val spark = SparkSession.builder()
  .appName("Neo4j Notebook")
  .config("spark.jars.packages", "org.neo4j:neo4j-connector-apache-spark_2.12:5.2.3_for_spark_3")
  .getOrCreate()&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@5dfa3492&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;cell 6&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%spark
// Load JSON file for Neo4j credentials
val jsonString = spark.read.text(absfss_Base_Path + "neo4j-conn.json").as[String].collect().mkString("\n")

// Parse JSON string
val mapper = new ObjectMapper() with ScalaObjectMapper
mapper.registerModule(DefaultScalaModule)
val data = mapper.readValue[Map[String, Any]](jsonString)

// Extract Neo4j connection details
val neo4jUrl = data("NEO4J_URL").asInstanceOf[String]
val neo4jUsername = data("NEO4J_USERNAME").asInstanceOf[String]
val neo4jPassword = data("NEO4J_PASSWORD").asInstanceOf[String]&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;jsonString: String =
"

{
    "NEO4J_URL": "neo4j+s://80c19ba0.databases.neo4j.io",
    "NEO4J_USERNAME": "neo4j",
    "NEO4J_PASSWORD": "**MY PASSWORD**",
    "AURA_INSTANCEID": "80c19ba0",
    "AURA_INSTANCENAME": "Instance01"
  }"
warning: one deprecation (since 2.12.1); for details, enable `:setting -deprecation' or `:replay -deprecation'
mapper: com.fasterxml.jackson.databind.ObjectMapper with com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper = $default$anon$1@d83795a
res23: com.fasterxml.jackson.databind.ObjectMapper = $default$anon$1@d83795a
data: Map[String,Any] = Map(NEO4J_PASSWORD -&amp;gt; **MY PASSWORD**, NEO4J_URL -&amp;gt; neo4j+s://80c19ba0.databases.neo4j.io, NEO4J_USERNAME -&amp;gt; neo4j, AURA_INSTANCEID -&amp;gt; 80c19ba0, AURA_INSTANCENAME -&amp;gt; Instance01)
neo4jUrl: String = neo4j+s://80c19ba0.databases.neo4j.io
neo4jUsername: String = neo4j
neo4jPassword: String = **MY PASSWORD**&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;cell 7&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%spark
import org.apache.spark.sql.{DataFrame, SaveMode}

// Write nodes to Neo4j
def writeNodesToNeo4j(dataFrame: DataFrame, label: String, nodeKey: String): Unit = {
  dataFrame.write.format("org.neo4j.spark.DataSource")
    .mode(SaveMode.Overwrite)
    .option("url", neo4jUrl)
    .option("authentication.basic.username", neo4jUsername)
    .option("authentication.basic.password", neo4jPassword)
    .option("labels", label)
    .option("node.keys", nodeKey)
    .option("schema.optimization.node.keys", "KEY") //create node key constraints under the hood
    .save()
}

writeNodesToNeo4j(customerDF, "Customer", "customerID")
writeNodesToNeo4j(supplierDF, "Supplier", "supplierID")
writeNodesToNeo4j(orderDF, "Order", "orderID")
writeNodesToNeo4j(productDF, "Product", "productID")
writeNodesToNeo4j(categoryDF, "Category", "categoryID")
writeNodesToNeo4j(addressDF, "Address", "addressID")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;result:&lt;/P&gt;&lt;P&gt;Diagnostics: Spark_User_UserApp_ClassNotFound&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;org.apache.spark.SparkClassNotFoundException: [DATA_SOURCE_NOT_FOUND] Failed to find the data source: org.neo4j.spark.DataSource. Please find packages at `https://spark.apache.org/third-party-projects.html`.
  at org.apache.spark.sql.errors.QueryExecutionErrors$.dataSourceNotFoundError(QueryExecutionErrors.scala:724)

  at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:650)

  at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:700)

  at org.apache.spark.sql.DataFrameWriter.lookupV2Provider(DataFrameWriter.scala:909)

  at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:276)

  at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:250)

  at $defaultwriteNodesToNeo4j(&amp;lt;console&amp;gt;:62)

  ... 64 elided

Caused by: java.lang.ClassNotFoundException: org.neo4j.spark.DataSource.DefaultSource

  at scala.reflect.internal.util.AbstractFileClassLoader.findClass(AbstractFileClassLoader.scala:72)

  at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:594)

  at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:527)

  at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$5(DataSource.scala:636)

  at scala.util.Try$.apply(Try.scala:213)

  at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$4(DataSource.scala:636)

  at scala.util.Failure.orElse(Try.scala:224)

  at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:636)

  ... 69 more&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The problem shows up on running the 7th cell of code. Any idea how to solve that?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jan 2025 13:18:59 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4386217#M6668</guid>
      <dc:creator>syl-ade</dc:creator>
      <dc:date>2025-01-29T13:18:59Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4386676#M6676</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;why spark session is created 3 times?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I know it might not make lot of difference, but still asking&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jan 2025 16:57:30 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4386676#M6676</guid>
      <dc:creator>nilendraFabric</dc:creator>
      <dc:date>2025-01-29T16:57:30Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4387494#M6691</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;,&lt;BR /&gt;Thanks for posting your query in Microsoft community forum.&lt;BR /&gt;&lt;BR /&gt;The error message suggests that the Neo4j Spark Connector is not being properly recognized. Please try the following fixes:&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;In your code, different cells are using different versions of the Neo4j Spark Connector:&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Cell 1: 5.3.2_for_spark_3&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Cell 2: 4.0.0_for_spark_3&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Cell 5: 5.2.3_for_spark_3&lt;BR /&gt;To prevent conflicts, please update all your Spark session configurations to the latest stable version,&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 5.3.2_for_spark_3.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Kindly update your write function to utilize "org.neo4j.spark" instead of "org.neo4j.spark.DataSource". Please also ensure that all dependencies are properly loaded before executing the write function.&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;If this helps, kindly &lt;STRONG&gt;Accept it as a solution&lt;/STRONG&gt; and give a "&lt;STRONG&gt;Kudos&lt;/STRONG&gt;" so other members can find it more easily.&lt;BR /&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Thu, 30 Jan 2025 06:38:59 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4387494#M6691</guid>
      <dc:creator>v-ssriganesh</dc:creator>
      <dc:date>2025-01-30T06:38:59Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4387941#M6704</link>
      <description>&lt;P&gt;I tried executing it in different forms and just did not cleaned it yet. As long as it works it does not bother me while testing the code.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 30 Jan 2025 10:18:50 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4387941#M6704</guid>
      <dc:creator>syl-ade</dc:creator>
      <dc:date>2025-01-30T10:18:50Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4387974#M6705</link>
      <description>&lt;P&gt;The solution provided did not help t-wth the problem.&amp;nbsp;&lt;BR /&gt;It started to create nodes with this code.&lt;/P&gt;&lt;LI-CODE lang="python"&gt;%%configure -f
{
  "conf": {
    "spark.jars": "abfss://Neo4j_Workspace1@onelake.dfs.fabric.microsoft.com/Northwind_Lakehouse.Lakehouse/Files/Northwind/neo4j-spark-connector-5.3.1-s_2.12.jar"
  }
}&lt;/LI-CODE&gt;&lt;LI-CODE lang="python"&gt;import org.apache.spark.sql.{SparkSession, SaveMode}
import org.apache.spark.sql.functions._
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.{DefaultScalaModule, ScalaObjectMapper}
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper


// Set base path
val absfss_Base_Path = "abfss://Neo4j_Workspace1@onelake.dfs.fabric.microsoft.com/Northwind_Lakehouse.Lakehouse/Files/Northwind/"

// Read Northwind data files
val customerDF = spark.read.option("header", true).csv(s"$absfss_Base_Path/customers.csv")
val supplierDF = spark.read.option("header", true).csv(s"$absfss_Base_Path/suppliers.csv")
val stagedOrderDF = spark.read.option("header", true).csv(s"$absfss_Base_Path/orders.csv")
  .withColumn("addressID", concat_ws(", ", col("shipName"), col("shipAddress"), col("shipCity"), col("shipRegion"), col("shipPostalCode"), col("shipCountry")))
val orderDetailDF = spark.read.option("header", true).csv(s"$absfss_Base_Path/order-details.csv")
val productDF = spark.read.option("header", true).csv(s"$absfss_Base_Path/products.csv")
val categoryDF = spark.read.option("header", true).csv(s"$absfss_Base_Path/categories.csv")

// Create separate addressesDF and finalize orderDF
val addressDF = stagedOrderDF
  .select($"addressID", $"shipName".alias("name"), $"shipAddress".alias("address"), $"shipCity".alias("city"), $"shipRegion".alias("region"), $"shipPostalCode".alias("postalCode"), $"shipCountry".alias("country"))
  .dropDuplicates("addressID")

val orderDF = stagedOrderDF.drop("shipName", "shipAddress", "shipCity", "shipRegion", "shipPostalCode", "shipCountry")

// Load JSON file for Neo4j credentials
val jsonString = spark.read.text(s"$absfss_Base_Path/neo4j-conn.json").as[String].collect().mkString("\n")

// Parse JSON string
val mapper = new ObjectMapper() with ScalaObjectMapper
mapper.registerModule(DefaultScalaModule)
val data = mapper.readValue[Map[String, Any]](jsonString)

// Extract Neo4j connection details
val neo4jUrl = data("NEO4J_URL").asInstanceOf[String]
val neo4jUsername = data("NEO4J_USERNAME").asInstanceOf[String]
val neo4jPassword = data("NEO4J_PASSWORD").asInstanceOf[String]

val neo4jOptions = Map(
  "url" -&amp;gt; neo4jUrl,
  "authentication.basic.username" -&amp;gt; neo4jUsername,
  "authentication.basic.password" -&amp;gt; neo4jPassword
)

customerDF.write
  .format("org.neo4j.spark.DataSource")
  .mode(SaveMode.Overwrite)
  .options(neo4jOptions)
  .option("labels", "Customer")
  .option("node.keys", "customerID")
  .save()

supplierDF.write
  .format("org.neo4j.spark.DataSource")
  .mode(SaveMode.Overwrite)
  .options(neo4jOptions)
  .option("labels", "Supplier")
  .option("node.keys", "supplierID")
  .save()

stagedOrderDF.write
  .format("org.neo4j.spark.DataSource")
  .mode(SaveMode.Overwrite)
  .options(neo4jOptions)
  .option("labels", "Order")
  .option("node.keys", "orderID")
  .save()
&lt;/LI-CODE&gt;&lt;P&gt;&lt;BR /&gt;&amp;nbsp;Yet i still do not know how to run this part succesfully&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;// Write nodes to Neo4j
def writeNodesToNeo4j(dataFrame: DataFrame, label: String, nodeKey: String): Unit = {
  dataFrame.write.format("org.neo4j.spark")
    .mode(SaveMode.Overwrite)
    .option("url", neo4jUrl)
    .option("authentication.basic.username", neo4jUsername)
    .option("authentication.basic.password", neo4jPassword)
    .option("labels", label)
    .option("node.keys", nodeKey)
    .option("schema.optimization.node.keys", "KEY") //create node key constraints under the hood
    .save()
}

writeNodesToNeo4j(customerDF, "Customer", "customerID")
writeNodesToNeo4j(supplierDF, "Supplier", "supplierID")
writeNodesToNeo4j(orderDF, "Order", "orderID")
writeNodesToNeo4j(productDF, "Product", "productID")
writeNodesToNeo4j(categoryDF, "Category", "categoryID")
writeNodesToNeo4j(addressDF, "Address", "addressID")&lt;/LI-CODE&gt;</description>
      <pubDate>Thu, 30 Jan 2025 10:32:39 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4387974#M6705</guid>
      <dc:creator>syl-ade</dc:creator>
      <dc:date>2025-01-30T10:32:39Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4388606#M6731</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;Thanks for your update. I'm glad you were able to create nodes successfully using your new approach. However, I see that your function writeNodesToNeo4j is still not working as expected.&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;In your function, you are using "org.neo4j.spark" as the format. However, it should be "org.neo4j.spark.DataSource", like in your working version.&lt;/LI&gt;
&lt;LI&gt;Since your working approach included %%configure -f, ensure this configuration is present in the notebook before executing any Spark operations.&lt;/LI&gt;
&lt;LI&gt;Run the %%configure -f block before running the function.&lt;/LI&gt;
&lt;LI&gt;Try adding the following before calling writeNodesToNeo4j to ensure that Spark is recognizing the JAR correctly&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;Let me know if you still face issues! If this helps, please mark it as a solution and give a "Kudos" so other community members can find it easily.&lt;BR /&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Thu, 30 Jan 2025 17:18:30 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4388606#M6731</guid>
      <dc:creator>v-ssriganesh</dc:creator>
      <dc:date>2025-01-30T17:18:30Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4391915#M6835</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;May I ask if you have resolved this issue? If so, please mark the helpful reply and accept it as the solution. This will be helpful for other community members who have similar problems to solve it faster.&lt;/P&gt;
&lt;P&gt;Thank you.&lt;/P&gt;
&lt;P&gt;&lt;LI-WRAPPER&gt;&lt;/LI-WRAPPER&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 03 Feb 2025 07:09:29 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4391915#M6835</guid>
      <dc:creator>v-ssriganesh</dc:creator>
      <dc:date>2025-02-03T07:09:29Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4397873#M6990</link>
      <description>&lt;P&gt;&lt;SPAN data-teams="true"&gt;Hi&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;,&lt;BR /&gt;I wanted to check if you had the opportunity to review the information provided. Please feel free to contact us if you have any further questions. If my response has addressed your query, please accept it as a solution and give a 'Kudos' so other members can easily find it.&lt;BR /&gt;Thank you.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 06 Feb 2025 08:59:51 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4397873#M6990</guid>
      <dc:creator>v-ssriganesh</dc:creator>
      <dc:date>2025-02-06T08:59:51Z</dc:date>
    </item>
    <item>
      <title>Re: Integrating Data from Microsoft Fabric's Lakehouse with Neo4j Graph Database</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4401456#M7069</link>
      <description>&lt;P&gt;&lt;SPAN data-teams="true"&gt;Hi&amp;nbsp;&lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/408321"&gt;@syl-ade&lt;/a&gt;,&lt;BR /&gt;I hope this information is helpful. Please let me know if you have any further questions or if you'd like to discuss this further. If this answers your question, please &lt;STRONG&gt;Accept it as a solution&lt;/STRONG&gt; and give it a '&lt;STRONG&gt;Kudos&lt;/STRONG&gt;' so others can find it easily.&lt;BR /&gt;Thank you.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 09 Feb 2025 13:28:17 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Integrating-Data-from-Microsoft-Fabric-s-Lakehouse-with-Neo4j/m-p/4401456#M7069</guid>
      <dc:creator>v-ssriganesh</dc:creator>
      <dc:date>2025-02-09T13:28:17Z</dc:date>
    </item>
  </channel>
</rss>

