import mysql.connector from mysql.connector import Error import pandas as pd import logging from datetime import datetime import time # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(f'magento_queries_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def create_db_connection(host_name, port, user_name, user_password, db_name, max_retries=3): connection = None retry_count = 0 while retry_count < max_retries: try: logger.info(f"Attempting to connect to database {db_name} at {host_name}:{port} (Attempt {retry_count + 1}/{max_retries})") connection = mysql.connector.connect( host=host_name, port=port, user=user_name, passwd=user_password, database=db_name, connection_timeout=180, # 3 minutes timeout pool_size=5, # Enable connection pooling pool_name="magento_pool" ) logger.info("MySQL Database connection successful") return connection except Error as err: retry_count += 1 logger.error(f"Failed to connect to database (Attempt {retry_count}/{max_retries}): {err}") if retry_count < max_retries: wait_time = 2 ** retry_count # Exponential backoff logger.info(f"Waiting {wait_time} seconds before retrying...") time.sleep(wait_time) else: print(f"Error: '{err}'") return None def execute_query(connection, query, question_number, question_text, max_retries=3): retry_count = 0 while retry_count < max_retries: try: cursor = connection.cursor(dictionary=True) results = None logger.info(f"Executing Question {question_number}: {question_text}") logger.debug(f"SQL Query:\n{query}") start_time = datetime.now() cursor.execute(query) execution_time = (datetime.now() - start_time).total_seconds() logger.info(f"Query executed in {execution_time:.2f} seconds") results = cursor.fetchall() logger.info(f"Retrieved {len(results) if results else 0} rows") print(f"\n--- Question {question_number} ---") print(f"Question: {question_text}") print(f"SQL:\n{query}") print("Output:") if results: df = pd.DataFrame(results) print(df.to_string()) logger.info(f"Results displayed successfully") else: print("No results found or query was an UPDATE/DELETE type.") logger.info("Query returned no results") if cursor.rowcount > -1 and not results: logger.info(f"Rows affected: {cursor.rowcount}") print(f"Rows affected: {cursor.rowcount}") cursor.close() logger.debug("Cursor closed") return results except Error as err: retry_count += 1 logger.error(f"Error executing query (Attempt {retry_count}/{max_retries}): {err}") if retry_count < max_retries: wait_time = 2 ** retry_count # Exponential backoff logger.info(f"Waiting {wait_time} seconds before retrying...") time.sleep(wait_time) # Try to reconnect if connection is lost try: connection.ping(reconnect=True, attempts=3, delay=5) logger.info("Successfully reconnected to database") except Error as reconnect_err: logger.error(f"Failed to reconnect: {reconnect_err}") connection = create_db_connection(DB_HOST, DB_PORT, DB_USER, DB_PASS, DB_NAME) if not connection: raise Exception("Failed to reestablish database connection") else: print(f"Error executing query: '{err}'") raise # --- Database Configuration --- DB_HOST = "localhost" DB_USER = "root" DB_PORT = 23306 DB_PASS = "1234567890" DB_NAME = "magentodb" # --- Questions and SQL Queries --- # Note: For EAV attributes, we often need to find the attribute_id first. # Common entity_type_ids: customer=1, customer_address=2, catalog_category=3, catalog_product=4 # Common store_id for global/admin values is 0. questions_and_queries = [ ( "Identify the product (SKU and Name) with the highest 'price' attribute value in the default store scope.", """ SELECT cpe.sku, cpev.value AS product_name, cped.value AS price FROM catalog_product_entity cpe JOIN catalog_product_entity_decimal cped ON cpe.entity_id = cped.entity_id JOIN eav_attribute ea_price ON cped.attribute_id = ea_price.attribute_id AND ea_price.attribute_code = 'price' AND ea_price.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product') JOIN catalog_product_entity_varchar cpev ON cpe.entity_id = cpev.entity_id JOIN eav_attribute ea_name ON cpev.attribute_id = ea_name.attribute_id AND ea_name.attribute_code = 'name' AND ea_name.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product') WHERE cped.store_id = 0 AND cpev.store_id = 0 -- Default store scope ORDER BY cped.value DESC LIMIT 1; """ ), ( "List all customers (First Name, Last Name, Email) who have not placed any orders.", """ SELECT ce.firstname, ce.lastname, ce.email FROM customer_entity ce LEFT JOIN sales_order so ON ce.entity_id = so.customer_id WHERE so.entity_id IS NULL; """ ), ( "What is the total number of 'simple' products currently marked as 'Out of Stock' (status = 0) in the 'default' inventory source?", """ SELECT COUNT(DISTINCT isi.sku) AS total_out_of_stock_simple_products FROM inventory_source_item isi JOIN catalog_product_entity cpe ON isi.sku = cpe.sku WHERE isi.source_code = 'default' AND isi.status = 0 -- 0 for Out of Stock, 1 for In Stock AND cpe.type_id = 'simple'; """ ), ( "Find the top 3 customer groups by the total 'grand_total' of orders placed. Show group name and total amount.", """ SELECT cg.customer_group_code, SUM(so.grand_total) AS total_order_amount FROM sales_order so JOIN customer_group cg ON so.customer_group_id = cg.customer_group_id GROUP BY cg.customer_group_code ORDER BY total_order_amount DESC LIMIT 3; """ ), ( "Retrieve the SKU, name, and creation date of the 5 oldest products that are still 'enabled' (status attribute value = 1).", """ SELECT p.sku, name_val.value AS product_name, p.created_at FROM catalog_product_entity p JOIN eav_attribute e_status ON e_status.attribute_code = 'status' AND e_status.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product') JOIN catalog_product_entity_int status_val ON status_val.attribute_id = e_status.attribute_id AND status_val.entity_id = p.entity_id AND status_val.store_id = 0 -- Check default scope status JOIN eav_attribute e_name ON e_name.attribute_code = 'name' AND e_name.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product') JOIN catalog_product_entity_varchar name_val ON name_val.attribute_id = e_name.attribute_id AND name_val.entity_id = p.entity_id AND name_val.store_id = 0 -- Name from default store view WHERE status_val.value = 1 -- 1 for Enabled ORDER BY p.created_at ASC LIMIT 5; """ ), ( "List all URL rewrites of type 'product' that are NOT autogenerated and their target paths for store_id 1.", """ SELECT request_path, target_path, description FROM url_rewrite WHERE entity_type = 'product' AND is_autogenerated = 0 AND store_id = 1; """ ), ( "How many distinct customers placed orders in January 2023?", """ SELECT COUNT(DISTINCT customer_id) AS distinct_customers_jan_2023 FROM sales_order WHERE created_at >= '2023-01-01 00:00:00' AND created_at < '2023-02-01 00:00:00' AND customer_id IS NOT NULL; -- Only count registered customers """ ), ( "Identify orders (Increment ID and Grand Total) placed by customers with ' VIP ' in their customer group code, and the order status is 'complete'.", """ SELECT so.increment_id, so.grand_total, cg.customer_group_code FROM sales_order so JOIN customer_group cg ON so.customer_group_id = cg.customer_group_id WHERE cg.customer_group_code LIKE '%VIP%' AND so.status = 'complete'; """ ), ( "Find the 3rd most recent review. Provide the review ID, nickname, title, and detail.", """ WITH RankedReviews AS ( SELECT r.review_id, rd.nickname, rd.title, rd.detail, r.created_at, DENSE_RANK() OVER (ORDER BY r.created_at DESC) as review_rank FROM review r JOIN review_detail rd ON r.review_id = rd.review_id ) SELECT review_id, nickname, title, detail, created_at FROM RankedReviews WHERE review_rank = 3; """, ), ( "List categories (Name and Path) that have no products assigned to them.", """ SELECT cce.entity_id, ccev.value AS category_name, cce.path FROM catalog_category_entity cce JOIN eav_attribute ea_name ON ea_name.attribute_code = 'name' AND ea_name.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_category') JOIN catalog_category_entity_varchar ccev ON ccev.attribute_id = ea_name.attribute_id AND ccev.entity_id = cce.entity_id AND ccev.store_id = 0 -- Default store name LEFT JOIN catalog_category_product ccp ON cce.entity_id = ccp.category_id WHERE ccp.product_id IS NULL AND cce.children_count = 0; -- Optionally, only leaf categories with no products """ ), ( "Which payment methods have been used for orders with a grand_total greater than $500 in the last 6 months? Show method and count of orders.", # Assuming current date is around May 2024 for "last 6 months" """ SELECT sop.method, COUNT(DISTINCT so.entity_id) AS order_count FROM sales_order so JOIN sales_order_payment sop ON so.entity_id = sop.parent_id WHERE so.grand_total > 500 AND so.created_at >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) GROUP BY sop.method ORDER BY order_count DESC; """ ) ] if __name__ == '__main__': logger.info("Initializing database connection") connection = create_db_connection(DB_HOST, DB_PORT, DB_USER, DB_PASS, DB_NAME) if connection: try: logger.info(f"Executing {len(questions_and_queries)} queries") for i, (question, query) in enumerate(questions_and_queries): try: execute_query(connection, query, i + 1, question) except Exception as e: logger.error(f"Failed to execute query {i + 1}: {str(e)}") continue # Continue with next query even if one fails finally: connection.close() logger.info("MySQL connection closed") print("\nMySQL connection is closed") else: logger.error("Failed to establish database connection")