Delete clean_mshr19_NV.py
This commit is contained in:
parent
3f1eafe8d0
commit
84ab38b4de
@ -1,51 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# Prompt the user for a valid date in DD/MM/YYYY format
|
|
||||||
while True:
|
|
||||||
date_str = input("Please input the date in YYYY/MM/DD format: ")
|
|
||||||
try:
|
|
||||||
# Validate format
|
|
||||||
date_obj = datetime.strptime(date_str, "%Y-%m-%d")
|
|
||||||
break
|
|
||||||
except ValueError:
|
|
||||||
print("Invalid date format. Please try again (YYYY/MM/DD).")
|
|
||||||
|
|
||||||
# Collect all .xls and .xlsx files in the current directory
|
|
||||||
excel_files = glob.glob("*.xls") + glob.glob("*.xlsx")
|
|
||||||
|
|
||||||
for file_path in excel_files:
|
|
||||||
print(f"Processing: {file_path}")
|
|
||||||
# Read the first sheet with no header; treat all rows as data
|
|
||||||
df = pd.read_excel(file_path, sheet_name=0, header=None)
|
|
||||||
|
|
||||||
# 1. Remove the first 9 rows
|
|
||||||
df = df.iloc[9:, :]
|
|
||||||
|
|
||||||
# 2. Remove rows where column A has "TỔNG"
|
|
||||||
# (Here, column A = df.iloc[:,0])
|
|
||||||
df = df[df.iloc[:, 0] != "TỔNG"]
|
|
||||||
|
|
||||||
# 3. Add a new column (the DataFrame column name here won't matter
|
|
||||||
# because we'll save with header=False)
|
|
||||||
df["temp_col"] = ""
|
|
||||||
|
|
||||||
# If there is at least one row, set the *top* row of that new column to "getDate"
|
|
||||||
if not df.empty:
|
|
||||||
df.iloc[0, df.columns.get_loc("temp_col")] = "getDate"
|
|
||||||
|
|
||||||
# If there's more than one row, fill the rest of that column with the user-entered date
|
|
||||||
if df.shape[0] > 1:
|
|
||||||
df.iloc[1:, df.columns.get_loc("temp_col")] = date_str
|
|
||||||
|
|
||||||
# Construct output file name
|
|
||||||
base_name = os.path.splitext(file_path)[0]
|
|
||||||
output_file = f"{base_name}_cleaned.xlsx"
|
|
||||||
|
|
||||||
# 4. Save the result WITHOUT a pandas header row
|
|
||||||
# So the first row of df remains the first row in Excel
|
|
||||||
df.to_excel(output_file, index=False, header=False)
|
|
||||||
|
|
||||||
print(f" --> Cleaned file saved as: {output_file}")
|
|
||||||
Loading…
x
Reference in New Issue
Block a user