Add BankCleaner.py
This commit is contained in:
parent
96268df5df
commit
0050c529cc
46
BankCleaner.py
Normal file
46
BankCleaner.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# 1. Initialize Tkinter (GUI) and hide the root window
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw()
|
||||||
|
|
||||||
|
# 2. Prompt user for the input Excel file
|
||||||
|
input_file_path = filedialog.askopenfilename(
|
||||||
|
title="Select the Excel file",
|
||||||
|
filetypes=[("Excel files", "*.xlsx *.xls"), ("All files", "*.*")]
|
||||||
|
)
|
||||||
|
if not input_file_path:
|
||||||
|
print("No file selected. Exiting.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
# 3. Read the Excel file as strings to preserve columns
|
||||||
|
df = pd.read_excel(input_file_path, dtype=str)
|
||||||
|
|
||||||
|
# 4. Keep only the columns you want
|
||||||
|
columns_to_keep = ["text", "time", "url", "topReactionsCount", "viewsCount", "likes"]
|
||||||
|
df = df[columns_to_keep]
|
||||||
|
|
||||||
|
# 5. Convert the 'time' column to YYYY-MM-DD
|
||||||
|
df["time"] = pd.to_datetime(df["time"]).dt.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
# 6. Remove empty lines (line breaks) in the 'text' column
|
||||||
|
df["text"] = df["text"].replace(r"[\r\n]+", " ", regex=True)
|
||||||
|
|
||||||
|
# 7. Remove "#" characters from the 'text' column
|
||||||
|
df["text"] = df["text"].replace(r"#", "", regex=True)
|
||||||
|
|
||||||
|
# 8. Prompt user for the output Excel file location
|
||||||
|
output_file_path = filedialog.asksaveasfilename(
|
||||||
|
title="Save Cleaned Excel File",
|
||||||
|
defaultextension=".xlsx",
|
||||||
|
filetypes=[("Excel files", "*.xlsx"), ("All files", "*.*")]
|
||||||
|
)
|
||||||
|
if not output_file_path:
|
||||||
|
print("No output file specified. Exiting.")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
# 9. Write the cleaned data to a new Excel file
|
||||||
|
df.to_excel(output_file_path, index=False)
|
||||||
|
print(f"Cleaned Excel file saved to: {output_file_path}")
|
||||||
Loading…
x
Reference in New Issue
Block a user