356 lines
13 KiB
Python
356 lines
13 KiB
Python
import mysql.connector
|
|
from mysql.connector import Error
|
|
import pandas as pd
|
|
import logging
|
|
from datetime import datetime
|
|
import time
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler(f'magento_queries_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def create_db_connection(host_name, port, user_name, user_password, db_name, max_retries=3):
|
|
connection = None
|
|
retry_count = 0
|
|
|
|
while retry_count < max_retries:
|
|
try:
|
|
logger.info(f"Attempting to connect to database {db_name} at {host_name}:{port} (Attempt {retry_count + 1}/{max_retries})")
|
|
connection = mysql.connector.connect(
|
|
host=host_name,
|
|
port=port,
|
|
user=user_name,
|
|
passwd=user_password,
|
|
database=db_name,
|
|
connection_timeout=180, # 3 minutes timeout
|
|
pool_size=5, # Enable connection pooling
|
|
pool_name="magento_pool"
|
|
)
|
|
logger.info("MySQL Database connection successful")
|
|
return connection
|
|
except Error as err:
|
|
retry_count += 1
|
|
logger.error(f"Failed to connect to database (Attempt {retry_count}/{max_retries}): {err}")
|
|
if retry_count < max_retries:
|
|
wait_time = 2 ** retry_count # Exponential backoff
|
|
logger.info(f"Waiting {wait_time} seconds before retrying...")
|
|
time.sleep(wait_time)
|
|
else:
|
|
print(f"Error: '{err}'")
|
|
return None
|
|
|
|
def execute_query(connection, query, question_number, question_text, max_retries=3):
|
|
retry_count = 0
|
|
while retry_count < max_retries:
|
|
try:
|
|
cursor = connection.cursor(dictionary=True)
|
|
results = None
|
|
logger.info(f"Executing Question {question_number}: {question_text}")
|
|
logger.debug(f"SQL Query:\n{query}")
|
|
|
|
start_time = datetime.now()
|
|
cursor.execute(query)
|
|
execution_time = (datetime.now() - start_time).total_seconds()
|
|
logger.info(f"Query executed in {execution_time:.2f} seconds")
|
|
|
|
results = cursor.fetchall()
|
|
logger.info(f"Retrieved {len(results) if results else 0} rows")
|
|
|
|
print(f"\n--- Question {question_number} ---")
|
|
print(f"Question: {question_text}")
|
|
print(f"SQL:\n{query}")
|
|
print("Output:")
|
|
|
|
if results:
|
|
df = pd.DataFrame(results)
|
|
print(df.to_string())
|
|
logger.info(f"Results displayed successfully")
|
|
else:
|
|
print("No results found or query was an UPDATE/DELETE type.")
|
|
logger.info("Query returned no results")
|
|
|
|
if cursor.rowcount > -1 and not results:
|
|
logger.info(f"Rows affected: {cursor.rowcount}")
|
|
print(f"Rows affected: {cursor.rowcount}")
|
|
|
|
cursor.close()
|
|
logger.debug("Cursor closed")
|
|
return results
|
|
|
|
except Error as err:
|
|
retry_count += 1
|
|
logger.error(f"Error executing query (Attempt {retry_count}/{max_retries}): {err}")
|
|
|
|
if retry_count < max_retries:
|
|
wait_time = 2 ** retry_count # Exponential backoff
|
|
logger.info(f"Waiting {wait_time} seconds before retrying...")
|
|
time.sleep(wait_time)
|
|
|
|
# Try to reconnect if connection is lost
|
|
try:
|
|
connection.ping(reconnect=True, attempts=3, delay=5)
|
|
logger.info("Successfully reconnected to database")
|
|
except Error as reconnect_err:
|
|
logger.error(f"Failed to reconnect: {reconnect_err}")
|
|
connection = create_db_connection(DB_HOST, DB_PORT, DB_USER, DB_PASS, DB_NAME)
|
|
if not connection:
|
|
raise Exception("Failed to reestablish database connection")
|
|
else:
|
|
print(f"Error executing query: '{err}'")
|
|
raise
|
|
|
|
# --- Database Configuration ---
|
|
DB_HOST = "localhost"
|
|
DB_USER = "root"
|
|
DB_PORT = 23306
|
|
DB_PASS = "1234567890"
|
|
DB_NAME = "magentodb"
|
|
|
|
# --- Questions and SQL Queries ---
|
|
# Note: For EAV attributes, we often need to find the attribute_id first.
|
|
# Common entity_type_ids: customer=1, customer_address=2, catalog_category=3, catalog_product=4
|
|
# Common store_id for global/admin values is 0.
|
|
|
|
questions_and_queries = [
|
|
(
|
|
"Identify the product (SKU and Name) with the highest 'price' attribute value in the default store scope.",
|
|
"""
|
|
SELECT
|
|
cpe.sku,
|
|
cpev.value AS product_name,
|
|
cped.value AS price
|
|
FROM
|
|
catalog_product_entity cpe
|
|
JOIN
|
|
catalog_product_entity_decimal cped ON cpe.entity_id = cped.entity_id
|
|
JOIN
|
|
eav_attribute ea_price ON cped.attribute_id = ea_price.attribute_id AND ea_price.attribute_code = 'price'
|
|
AND ea_price.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product')
|
|
JOIN
|
|
catalog_product_entity_varchar cpev ON cpe.entity_id = cpev.entity_id
|
|
JOIN
|
|
eav_attribute ea_name ON cpev.attribute_id = ea_name.attribute_id AND ea_name.attribute_code = 'name'
|
|
AND ea_name.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product')
|
|
WHERE
|
|
cped.store_id = 0 AND cpev.store_id = 0 -- Default store scope
|
|
ORDER BY
|
|
cped.value DESC
|
|
LIMIT 1;
|
|
"""
|
|
),
|
|
(
|
|
"List all customers (First Name, Last Name, Email) who have not placed any orders.",
|
|
"""
|
|
SELECT
|
|
ce.firstname,
|
|
ce.lastname,
|
|
ce.email
|
|
FROM
|
|
customer_entity ce
|
|
LEFT JOIN
|
|
sales_order so ON ce.entity_id = so.customer_id
|
|
WHERE
|
|
so.entity_id IS NULL;
|
|
"""
|
|
),
|
|
(
|
|
"What is the total number of 'simple' products currently marked as 'Out of Stock' (status = 0) in the 'default' inventory source?",
|
|
"""
|
|
SELECT COUNT(DISTINCT isi.sku) AS total_out_of_stock_simple_products
|
|
FROM inventory_source_item isi
|
|
JOIN catalog_product_entity cpe ON isi.sku = cpe.sku
|
|
WHERE
|
|
isi.source_code = 'default'
|
|
AND isi.status = 0 -- 0 for Out of Stock, 1 for In Stock
|
|
AND cpe.type_id = 'simple';
|
|
"""
|
|
),
|
|
(
|
|
"Find the top 3 customer groups by the total 'grand_total' of orders placed. Show group name and total amount.",
|
|
"""
|
|
SELECT
|
|
cg.customer_group_code,
|
|
SUM(so.grand_total) AS total_order_amount
|
|
FROM
|
|
sales_order so
|
|
JOIN
|
|
customer_group cg ON so.customer_group_id = cg.customer_group_id
|
|
GROUP BY
|
|
cg.customer_group_code
|
|
ORDER BY
|
|
total_order_amount DESC
|
|
LIMIT 3;
|
|
"""
|
|
),
|
|
(
|
|
"Retrieve the SKU, name, and creation date of the 5 oldest products that are still 'enabled' (status attribute value = 1).",
|
|
"""
|
|
SELECT
|
|
p.sku,
|
|
name_val.value AS product_name,
|
|
p.created_at
|
|
FROM
|
|
catalog_product_entity p
|
|
JOIN
|
|
eav_attribute e_status ON e_status.attribute_code = 'status'
|
|
AND e_status.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product')
|
|
JOIN
|
|
catalog_product_entity_int status_val ON status_val.attribute_id = e_status.attribute_id
|
|
AND status_val.entity_id = p.entity_id
|
|
AND status_val.store_id = 0 -- Check default scope status
|
|
JOIN
|
|
eav_attribute e_name ON e_name.attribute_code = 'name'
|
|
AND e_name.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_product')
|
|
JOIN
|
|
catalog_product_entity_varchar name_val ON name_val.attribute_id = e_name.attribute_id
|
|
AND name_val.entity_id = p.entity_id
|
|
AND name_val.store_id = 0 -- Name from default store view
|
|
WHERE
|
|
status_val.value = 1 -- 1 for Enabled
|
|
ORDER BY
|
|
p.created_at ASC
|
|
LIMIT 5;
|
|
"""
|
|
),
|
|
(
|
|
"List all URL rewrites of type 'product' that are NOT autogenerated and their target paths for store_id 1.",
|
|
"""
|
|
SELECT
|
|
request_path,
|
|
target_path,
|
|
description
|
|
FROM
|
|
url_rewrite
|
|
WHERE
|
|
entity_type = 'product'
|
|
AND is_autogenerated = 0
|
|
AND store_id = 1;
|
|
"""
|
|
),
|
|
(
|
|
"How many distinct customers placed orders in January 2023?",
|
|
"""
|
|
SELECT
|
|
COUNT(DISTINCT customer_id) AS distinct_customers_jan_2023
|
|
FROM
|
|
sales_order
|
|
WHERE
|
|
created_at >= '2023-01-01 00:00:00'
|
|
AND created_at < '2023-02-01 00:00:00'
|
|
AND customer_id IS NOT NULL; -- Only count registered customers
|
|
"""
|
|
),
|
|
(
|
|
"Identify orders (Increment ID and Grand Total) placed by customers with ' VIP ' in their customer group code, and the order status is 'complete'.",
|
|
"""
|
|
SELECT
|
|
so.increment_id,
|
|
so.grand_total,
|
|
cg.customer_group_code
|
|
FROM
|
|
sales_order so
|
|
JOIN
|
|
customer_group cg ON so.customer_group_id = cg.customer_group_id
|
|
WHERE
|
|
cg.customer_group_code LIKE '%VIP%'
|
|
AND so.status = 'complete';
|
|
"""
|
|
),
|
|
(
|
|
"Find the 3rd most recent review. Provide the review ID, nickname, title, and detail.",
|
|
"""
|
|
WITH RankedReviews AS (
|
|
SELECT
|
|
r.review_id,
|
|
rd.nickname,
|
|
rd.title,
|
|
rd.detail,
|
|
r.created_at,
|
|
DENSE_RANK() OVER (ORDER BY r.created_at DESC) as review_rank
|
|
FROM
|
|
review r
|
|
JOIN
|
|
review_detail rd ON r.review_id = rd.review_id
|
|
)
|
|
SELECT
|
|
review_id,
|
|
nickname,
|
|
title,
|
|
detail,
|
|
created_at
|
|
FROM
|
|
RankedReviews
|
|
WHERE
|
|
review_rank = 3;
|
|
""",
|
|
),
|
|
(
|
|
"List categories (Name and Path) that have no products assigned to them.",
|
|
"""
|
|
SELECT
|
|
cce.entity_id,
|
|
ccev.value AS category_name,
|
|
cce.path
|
|
FROM
|
|
catalog_category_entity cce
|
|
JOIN
|
|
eav_attribute ea_name ON ea_name.attribute_code = 'name'
|
|
AND ea_name.entity_type_id = (SELECT entity_type_id FROM eav_entity_type WHERE entity_type_code = 'catalog_category')
|
|
JOIN
|
|
catalog_category_entity_varchar ccev ON ccev.attribute_id = ea_name.attribute_id
|
|
AND ccev.entity_id = cce.entity_id
|
|
AND ccev.store_id = 0 -- Default store name
|
|
LEFT JOIN
|
|
catalog_category_product ccp ON cce.entity_id = ccp.category_id
|
|
WHERE
|
|
ccp.product_id IS NULL
|
|
AND cce.children_count = 0; -- Optionally, only leaf categories with no products
|
|
"""
|
|
),
|
|
(
|
|
"Which payment methods have been used for orders with a grand_total greater than $500 in the last 6 months? Show method and count of orders.",
|
|
# Assuming current date is around May 2024 for "last 6 months"
|
|
"""
|
|
SELECT
|
|
sop.method,
|
|
COUNT(DISTINCT so.entity_id) AS order_count
|
|
FROM
|
|
sales_order so
|
|
JOIN
|
|
sales_order_payment sop ON so.entity_id = sop.parent_id
|
|
WHERE
|
|
so.grand_total > 500
|
|
AND so.created_at >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH)
|
|
GROUP BY
|
|
sop.method
|
|
ORDER BY
|
|
order_count DESC;
|
|
"""
|
|
)
|
|
]
|
|
|
|
if __name__ == '__main__':
|
|
logger.info("Initializing database connection")
|
|
connection = create_db_connection(DB_HOST, DB_PORT, DB_USER, DB_PASS, DB_NAME)
|
|
if connection:
|
|
try:
|
|
logger.info(f"Executing {len(questions_and_queries)} queries")
|
|
for i, (question, query) in enumerate(questions_and_queries):
|
|
try:
|
|
execute_query(connection, query, i + 1, question)
|
|
except Exception as e:
|
|
logger.error(f"Failed to execute query {i + 1}: {str(e)}")
|
|
continue # Continue with next query even if one fails
|
|
finally:
|
|
connection.close()
|
|
logger.info("MySQL connection closed")
|
|
print("\nMySQL connection is closed")
|
|
else:
|
|
logger.error("Failed to establish database connection") |