<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Pydeequ - JavaPackage is not callable in Data Engineering</title>
    <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4029535#M2881</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm trying to use PyDeequ and I am following the steps here -&amp;nbsp;&lt;A href="https://pydeequ.readthedocs.io/en/latest/README.html#installation" target="_blank"&gt;https://pydeequ.readthedocs.io/en/latest/README.html#installation&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;1.&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;pip install pydeequ&lt;/LI-CODE&gt;&lt;P&gt;2.&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import os

# Set the SPARK_VERSION environment variable
os.environ['SPARK_VERSION'] = '3.3'&lt;/LI-CODE&gt;&lt;P&gt;3.&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import SparkSession, Row
import pydeequ

spark = (SparkSession
    .builder
    .config("spark.jars.packages", pydeequ.deequ_maven_coord)
    .config("spark.jars.excludes", pydeequ.f2j_maven_coord)
    .getOrCreate())

df = spark.sparkContext.parallelize([
            Row(a="foo", b=1, c=5),
            Row(a="bar", b=2, c=6),
            Row(a="baz", b=3, c=None)]).toDF()&lt;/LI-CODE&gt;&lt;P&gt;4.&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;from &lt;SPAN class=""&gt;pydeequ.analyzers &lt;SPAN class=""&gt;import &lt;SPAN class=""&gt;*
&lt;SPAN class=""&gt;analysisResult &lt;SPAN class=""&gt;= &lt;SPAN class=""&gt;AnalysisRunner&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;spark&lt;SPAN class=""&gt;) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;onData&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;df&lt;SPAN class=""&gt;) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;addAnalyzer&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;Size&lt;SPAN class=""&gt;()) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;addAnalyzer&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;Completeness&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;"b"&lt;SPAN class=""&gt;)) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;run&lt;SPAN class=""&gt;()

&lt;SPAN class=""&gt;analysisResult_df &lt;SPAN class=""&gt;= &lt;SPAN class=""&gt;AnalyzerContext&lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;successMetricsAsDataFrame&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;spark&lt;SPAN class=""&gt;, &lt;SPAN class=""&gt;analysisResult&lt;SPAN class=""&gt;)
&lt;SPAN class=""&gt;analysisResult_df&lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;show&lt;SPAN class=""&gt;()&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am getting the following error:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[53], line 4
      1 from pydeequ.analyzers import *
      3 analysisResult = AnalysisRunner(spark) \
----&amp;gt; 4                     .onData(df) \
      5                     .addAnalyzer(Size()) \
      6                     .addAnalyzer(Completeness("b")) \
      7                     .run()
      9 analysisResult_df = AnalyzerContext.successMetricsAsDataFrame(spark, analysisResult)
     10 analysisResult_df.show()

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pydeequ/analyzers.py:52, in AnalysisRunner.onData(self, df)
     46 """
     47 Starting point to construct an AnalysisRun.
     48 :param dataFrame df: tabular data on which the checks should be verified
     49 :return: new AnalysisRunBuilder object
     50 """
     51 df = ensure_pyspark_df(self._spark_session, df)
---&amp;gt; 52 return AnalysisRunBuilder(self._spark_session, df)

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pydeequ/analyzers.py:124, in AnalysisRunBuilder.__init__(self, spark_session, df)
    122 self._jspark_session = spark_session._jsparkSession
    123 self._df = df
--&amp;gt; 124 self._AnalysisRunBuilder = self._jvm.com.amazon.deequ.analyzers.runners.AnalysisRunBuilder(df._jdf)

TypeError: 'JavaPackage' object is not callable
 &lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Did I miss any installation or setup or anything?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 08 Jul 2024 07:10:30 GMT</pubDate>
    <dc:creator>russelp</dc:creator>
    <dc:date>2024-07-08T07:10:30Z</dc:date>
    <item>
      <title>Pydeequ - JavaPackage is not callable</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4029535#M2881</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm trying to use PyDeequ and I am following the steps here -&amp;nbsp;&lt;A href="https://pydeequ.readthedocs.io/en/latest/README.html#installation" target="_blank"&gt;https://pydeequ.readthedocs.io/en/latest/README.html#installation&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;1.&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;pip install pydeequ&lt;/LI-CODE&gt;&lt;P&gt;2.&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import os

# Set the SPARK_VERSION environment variable
os.environ['SPARK_VERSION'] = '3.3'&lt;/LI-CODE&gt;&lt;P&gt;3.&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import SparkSession, Row
import pydeequ

spark = (SparkSession
    .builder
    .config("spark.jars.packages", pydeequ.deequ_maven_coord)
    .config("spark.jars.excludes", pydeequ.f2j_maven_coord)
    .getOrCreate())

df = spark.sparkContext.parallelize([
            Row(a="foo", b=1, c=5),
            Row(a="bar", b=2, c=6),
            Row(a="baz", b=3, c=None)]).toDF()&lt;/LI-CODE&gt;&lt;P&gt;4.&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;from &lt;SPAN class=""&gt;pydeequ.analyzers &lt;SPAN class=""&gt;import &lt;SPAN class=""&gt;*
&lt;SPAN class=""&gt;analysisResult &lt;SPAN class=""&gt;= &lt;SPAN class=""&gt;AnalysisRunner&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;spark&lt;SPAN class=""&gt;) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;onData&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;df&lt;SPAN class=""&gt;) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;addAnalyzer&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;Size&lt;SPAN class=""&gt;()) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;addAnalyzer&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;Completeness&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;"b"&lt;SPAN class=""&gt;)) \
                    &lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;run&lt;SPAN class=""&gt;()

&lt;SPAN class=""&gt;analysisResult_df &lt;SPAN class=""&gt;= &lt;SPAN class=""&gt;AnalyzerContext&lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;successMetricsAsDataFrame&lt;SPAN class=""&gt;(&lt;SPAN class=""&gt;spark&lt;SPAN class=""&gt;, &lt;SPAN class=""&gt;analysisResult&lt;SPAN class=""&gt;)
&lt;SPAN class=""&gt;analysisResult_df&lt;SPAN class=""&gt;.&lt;SPAN class=""&gt;show&lt;SPAN class=""&gt;()&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am getting the following error:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[53], line 4
      1 from pydeequ.analyzers import *
      3 analysisResult = AnalysisRunner(spark) \
----&amp;gt; 4                     .onData(df) \
      5                     .addAnalyzer(Size()) \
      6                     .addAnalyzer(Completeness("b")) \
      7                     .run()
      9 analysisResult_df = AnalyzerContext.successMetricsAsDataFrame(spark, analysisResult)
     10 analysisResult_df.show()

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pydeequ/analyzers.py:52, in AnalysisRunner.onData(self, df)
     46 """
     47 Starting point to construct an AnalysisRun.
     48 :param dataFrame df: tabular data on which the checks should be verified
     49 :return: new AnalysisRunBuilder object
     50 """
     51 df = ensure_pyspark_df(self._spark_session, df)
---&amp;gt; 52 return AnalysisRunBuilder(self._spark_session, df)

File ~/cluster-env/trident_env/lib/python3.10/site-packages/pydeequ/analyzers.py:124, in AnalysisRunBuilder.__init__(self, spark_session, df)
    122 self._jspark_session = spark_session._jsparkSession
    123 self._df = df
--&amp;gt; 124 self._AnalysisRunBuilder = self._jvm.com.amazon.deequ.analyzers.runners.AnalysisRunBuilder(df._jdf)

TypeError: 'JavaPackage' object is not callable
 &lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Did I miss any installation or setup or anything?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 08 Jul 2024 07:10:30 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4029535#M2881</guid>
      <dc:creator>russelp</dc:creator>
      <dc:date>2024-07-08T07:10:30Z</dc:date>
    </item>
    <item>
      <title>Re: Pydeequ - JavaPackage is not callable</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4031271#M2899</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.fabric.microsoft.com/t5/user/viewprofilepage/user-id/757085"&gt;@russelp&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The "'JavaPackage' object is not callable" error message usually means that the used Java/Scala package was not found. This may mean that the Deequ library was not loaded correctly into the Spark session.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;There are several things you can check for the problem:&lt;/P&gt;
&lt;P&gt;Make sure you are using compatible versions of Spark and Deequ.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Make sure PyDeequ is correctly installed and up to date. This can be checked with the following command:&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;pip show pydeequ&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PyDeequ can be reinstalled with the following command:&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;pip install --upgrade pydeequ&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Best Regards,&lt;BR /&gt;Yang&lt;BR /&gt;Community Support Team&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If there is any post&amp;nbsp;&lt;STRONG&gt;&lt;EM&gt;helps&lt;/EM&gt;&lt;/STRONG&gt;, then please consider&amp;nbsp;&lt;STRONG&gt;&lt;EM&gt;Accept it as the solution&lt;/EM&gt;&lt;/STRONG&gt;&amp;nbsp;&amp;nbsp;to help the other members find it more quickly.&lt;BR /&gt;If I misunderstand your needs or you still have problems on it, please feel free to let us know.&amp;nbsp;&lt;STRONG&gt;&lt;EM&gt;Thanks a lot!&lt;/EM&gt;&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 09 Jul 2024 01:09:49 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4031271#M2899</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2024-07-09T01:09:49Z</dc:date>
    </item>
    <item>
      <title>Re: Pydeequ - JavaPackage is not callable</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4035680#M2946</link>
      <description>&lt;P&gt;Hi&amp;nbsp;@Anonymous&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We had the latest version of pydeequ and we managed to solve it by adding a spark property to the environment. See screenshot below.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="russelp_0-1720651892035.png" style="width: 400px;"&gt;&lt;img src="https://community.fabric.microsoft.com/t5/image/serverpage/image-id/1130075iDF3F5994D9CA34F0/image-size/medium?v=v2&amp;amp;px=400" role="button" title="russelp_0-1720651892035.png" alt="russelp_0-1720651892035.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 10 Jul 2024 22:52:35 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4035680#M2946</guid>
      <dc:creator>russelp</dc:creator>
      <dc:date>2024-07-10T22:52:35Z</dc:date>
    </item>
    <item>
      <title>Re: Pydeequ - JavaPackage is not callable</title>
      <link>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4840801#M12683</link>
      <description>&lt;P&gt;I have installed pypi library to my envinornment , the latest version for pydeeque available is 1.5.0 and added the spark property&amp;nbsp;&lt;BR /&gt;com.amazon.deequ:deequ:1.5.0-spark-3.5, after adding this I am not able to connect to spark session.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 02 Oct 2025 17:04:23 GMT</pubDate>
      <guid>https://community.fabric.microsoft.com/t5/Data-Engineering/Pydeequ-JavaPackage-is-not-callable/m-p/4840801#M12683</guid>
      <dc:creator>datalearner_1</dc:creator>
      <dc:date>2025-10-02T17:04:23Z</dc:date>
    </item>
  </channel>
</rss>

