How to fix Unresolved import 'src.transform' in python ?

Question

# transform.py

from datetime import date
from html import unescape

import pandas as pd
from bs4 import BeautifulSoup

class Transform:
    def __init__(self, df, config):
        self.df = df
        self.config = config

    def save(self, output_path):
        """
        Description:
            save the cleaned df into csv
        args:
            output_path: path to output.csv
        """
        self.df.to_csv(output_path, index=False)

    def html_format_remover(self):
        """
        Description:
            remove html entities/character references in COLUMNS_TO_REMOVE_HTML(config) columns
        """
        for col in self.config["COLUMNS_REMOVE_HTML"]:
            self.df[col] = (
                      self.df[col].apply(
                          lambda x: BeautifulSoup(
                              unescape(x), "html.parser"
                          ).text
                          if pd.notnull(x)
                          else x
                      )
                )
        return self.df

    def reformat_string(self):
        """
        Description:
            convert to lowercase and remove all special characters in all COLUMN_STR_TYPES(config) columns
            remove digits in product_name and product_description
        """
        for col in self.config["COLUMN_STR_TYPES"]:
            self.df[col] = self.df[col].astype(str)

            self.df[col] = self.df[col].apply(
                lambda x: x.lower()
            )

            if not col == "sku":
                self.df[col] = self.df[col].str.replace(
                        r"[^\w\s]|_",
                        "",
                        regex=True
                )

            if col == "product_name" or col == "product_description":
                self.df[col] = self.df[col].replace(
                        r'[\d]',
                        '',
                        regex=True
                    )
        return self.df

    def keep_unit_price_qyt_more_than_zero(self):
        """
        Description:
            Keep rows that have COLUMNS_KEEP_MORETHANZERO(config) greater than 0
        """
        for col in self.config["COLUMNS_KEEP_MORETHANZERO"]:
            self.df[col].fillna(0, inplace=True)
            self.df = self.df[self.df[col] > 0]
        return self.df

    def age_group(self):
        """
        Description:
            add age_group column that is derived from COLUMN_CONVERT_AGE(config) column
        """
        year = date.today().year

        yob = self.config["COLUMN_CONVERT_AGE"][0]
        self.df['age'] = self.df[yob].apply(
            lambda x: x
            if (pd.isnull(x) or x == '')
            else year-int(x)
        )
        self.df['age'] = pd.to_numeric(self.df['age'])
        self.df['age_group'] = pd.cut(
            self.df['age'],
            bins=self.config["AGE_BINS"],
            labels=self.config["AGE_LABELS"],
            right=False,
            include_lowest=True
        )
        self.df = self.df.drop(columns=['age'])
        return self.df

    def region(self):
        """
        Description:
            add region column that is derived from COLUMN_CONVERT_POSTAL(config) column
        """
        postal_sector = self.config["COLUMN_CONVERT_POSTAL"][0]
        self.df[postal_sector] = self.df[postal_sector].astype(str)
        self.df['region'] = self.df[postal_sector].apply(
            lambda x: x[:2]
            if not ((pd.isnull(x) or x == '') and len(x) == 2)
            else (x[:1]
                  if not ((pd.isnull(x) or x == '') and len(x) == 1)
                  else ''
                  )
        )
        DICT_REGION = {
            'north-east': [
                "53", "54", "55", "56", "57",
                "79",
                "80", "82"
            ],
            'north': [
                "69",
                "70", "71", "72", "73", "75", "76"
            ],
            'central': [
                "1", "2", "3", "4", "5", "6", "7", "8", "9",
                "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
                "14", "15", "16", "17", "18", "19",
                "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
                "30", "31", "32", "33", "34", "35", "36", "37", "38", "39",
                "40", "41",
                "58", "59",
                "77", "78"
            ],
            'west': [
                "11", "12", "13",
                "60", "61", "62", "63", "64", "65", "66", "67", "68"
            ],
            'east': [
                "42", "43", "44", "45", "46", "47", "48", "49",
                "50", "51", "52",
                "81"
            ]
        }
        mapping = {}
        for key, values in DICT_REGION.items():
            for item in values:
                mapping[item] = key
        self.df['region'] = self.df['region'].replace(mapping)
        self.df['region'] = self.df['region'].apply(
            lambda x: x
            if (
                    x == 'north-east' or
                    x == 'north' or
                    x == 'central' or
                    x == 'west' or
                    x == 'east'
            )
            else ""
        )
        return self.df

    def remove_spacing_from_sku(self):
        """
        Description:
            substitute COLUMN_SKU(config) spacing with "_"
            substitute COLUMN_SKU(config) starting with 0 with string "A"
        """
        sku = self.config["COLUMN_SKU"][0]
        self.df[sku] = self.df[sku].replace(
            r'[ ]',
            '_',
            regex=True
        )
        self.df[sku] = self.df[sku].apply(
            lambda x: "A" + x[1:]
            if x[0] == "0"
            else x
        )
        return self.df

    def combine_storeID_outlet_to_SKU(self):
        """
        Description:
            Appends COLUMN_STOREID(config) + COLUMN_OUTLET(config) to COLUMN_SKU(config) if same COLUMN_SKU(config), different stores
        """
        sku = self.config["COLUMN_SKU"][0]
        sid = self.config["COLUMN_STOREID"][0]
        outlet = self.config["COLUMN_OUTLET"][0]
        self.df[sku] = self.df[sid] + "_" + self.df[outlet] + "_" + self.df[sku]
        return self.df

    def combine_storeID_outlet_to_customerID(self):
        """
        Description:
            Appends COLUMN_STOREID(config) + COLUMN_OUTLET(config) to COLUMN_CUSTID(config) if same COLUMN_CUSTID(config), different stores
        """
        cid = self.config["COLUMN_CUSTID"][0]
        sid = self.config["COLUMN_STOREID"][0]
        outlet = self.config["COLUMN_OUTLET"][0]
        self.df[cid] = self.df[sid].astype(str) + "_" + self.df[outlet].astype(str)  + "_" + self.df[cid].astype(str)
        return self.df

    def remove_null_compulsory_data(self):
        """
        Description:
            Removes rows where data is null but schema does not allow nullable
            and is compulsory as per COLUMNS_NULL_COMPULSORY(config)
        """
        for col in self.config["COLUMNS_NULL_COMPULSORY"]:
            self.df.drop(
                self.df[
                    self.df[col].map(len) < 1
                ].index,
                inplace=True
            )

            self.df.drop(
                self.df[
                    self.df[col] == "nan"
                ].index,
                inplace=True
            )
            self.df.drop(
                self.df[
                    self.df[col] == " "
                ].index,
                inplace=True
            )
        return self.df

Hi All ,

I having warning in this code . can anyone check and can give some solution for ETL code.

Thanks in advance

karthik

Abhishek Saini · Answer

If the module or package is not in the same directory as your script, you need to make sure that Python can find it. You can do this by adding the path to the module's parent directory to the Python path using the sys.path or PYTHONPATH environment variable. For example, if your project structure looks like this: project/ src/ transform/ __init__.py some_module.py your_script.py