Alealejandrooo commited on
Commit
605feb9
1 Parent(s): 7bbe80f

u[dated main function

Browse files
Files changed (1) hide show
  1. process.py +58 -72
process.py CHANGED
@@ -4,88 +4,74 @@ import re
4
  from datetime import timedelta
5
 
6
 
7
- def process_data(files_mindbody, files_medserv, tolerance, progress=gr.Progress()):
8
-
9
- try:
10
- mindbody = load_data(files_mindbody)
11
- medserv = load_data(files_medserv)
12
- except Exception as e:
13
- print(f"An error occurred while loading data: {e}")
14
- return None
15
-
16
- try:
17
- # Remove multiple commas from the 'Client' column
18
- medserv['Client'] = medserv['Client'].str.replace(r',+', ',', regex=True)
19
- mindbody['Client'] = mindbody['Client'].str.replace(r',+', ',', regex=True)
20
-
21
- # Split 'Client' names into first name and last name components for both DataFrames
22
- medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
23
- mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
24
- except Exception as e:
25
- print(f"An error occurred while processing client names: {e}")
26
-
27
- try:
28
- # Split dates if they contain commas in the 'DOS' column of medserv
29
- medserv['DOS'] = medserv['DOS'].astype(str)
30
- medserv['DOS'] = medserv['DOS'].str.split(',')
31
- medserv = medserv.explode('DOS')
32
-
33
- # Attempt to convert dates using multiple formats
34
- formats_to_try = ['%d/%m/%Y', '%Y-%m-%d'] # Add more formats as needed
35
- for format_to_try in formats_to_try:
36
- try:
37
- medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
38
- break # Break out of loop if conversion succeeds
39
- except ValueError:
40
- continue # Continue to next format if conversion fails
41
- except Exception as e:
42
- print(f"An error occurred while processing dates in medserv: {e}")
43
 
 
44
  unmatched_rows = []
45
 
46
- try:
47
- rows = len(mindbody)
48
 
49
- # Iterate through each row in the mindbody DataFrame
50
- for idx in progress.tdqm(range(rows), desc='Analyzing files...'):
51
- # Extract relevant information from the current row
52
- date = mindbody.iloc[idx]['DOS']
53
- first_name = mindbody.iloc[idx]['First Name']
54
- last_name = mindbody.iloc[idx]['Last Name']
55
-
56
- # Define the range of dates to search for a match in medserv
57
- date_range = [date - timedelta(days=i) for i in range(tolerance, -tolerance-1, -1)]
58
- # Remove the time component from the dates in date_range
59
- date_range = [d.date() for d in date_range]
 
 
 
 
 
 
 
 
 
60
 
61
- # Filter medserv based on the date range and name criteria
62
- matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
63
- ((medserv['First Name'].str.lower() == first_name.lower()) |
64
- (medserv['Last Name'].str.lower() == last_name.lower())))]
65
 
66
- # If no match is found, append the row to the unmatched_rows list
67
- if matches.empty:
68
- unmatched_rows.append(mindbody.iloc[idx])
69
- except Exception as e:
70
- print(f"An error occurred while analyzing files: {e}")
71
 
72
- try:
73
- # Create a DataFrame from the unmatched_rows list
74
- unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
75
 
76
- # Specify the columns to include in the output Excel file
77
- columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
78
 
79
- # Format the 'DOS' column to remove time part
80
- unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
81
 
82
- output_file_path = 'Comparison Results.xlsx'
83
- unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
84
-
85
- return output_file_path
86
- except Exception as e:
87
- print(f"An error occurred while creating the output file: {e}")
88
- return None
89
 
90
 
91
 
 
4
  from datetime import timedelta
5
 
6
 
7
+ def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
8
+
9
+ mindbody = load_data(files_mindbody)
10
+ medserv = load_data(files_medserv)
11
+
12
+ medserv['Client'] = medserv['Client'].str.replace(r',+', ',', regex=True)
13
+ mindbody['Client'] = mindbody['Client'].str.replace(r',+', ',', regex=True)
14
+
15
+ # Split 'Client' names into first name and last name components for both DataFrames
16
+ medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
17
+ mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
18
+
19
+ mindbody['DOS'] = pd.to_datetime(mindbody['DOS'], format='%d/%m/%Y')
20
+
21
+ # Split dates if they contain commas in the 'DOS' column of medserv
22
+ medserv['DOS'] = medserv['DOS'].astype(str)
23
+ medserv['DOS'] = medserv['DOS'].str.split(',')
24
+ medserv = medserv.explode('DOS')
25
+
26
+ # Attempt to convert dates using multiple formats
27
+ formats_to_try = ['%d/%m/%Y', '%Y-%m-%d'] # Add more formats as needed
28
+ for format_to_try in formats_to_try:
29
+ try:
30
+ medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
31
+ break # Break out of loop if conversion succeeds
32
+ except ValueError:
33
+ continue # Continue to next format if conversion fails
 
 
 
 
 
 
 
 
 
34
 
35
+ # Initialize an empty list to store unmatched rows
36
  unmatched_rows = []
37
 
38
+ rows = len(mindbody)
 
39
 
40
+ # Iterate through each row in the mindbody DataFrame
41
+ for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
42
+ # Extract relevant information from the current row
43
+ date = mindbody.iloc[idx]['DOS']
44
+ first_name = mindbody.iloc[idx]['First Name']
45
+ last_name = mindbody.iloc[idx]['Last Name']
46
+
47
+ # Define the range of dates to search for a match in medserv
48
+ date_range = [date - timedelta(days=i) for i in range(tollerance, -tollerance-1, -1)]
49
+ # Remove the time component from the dates in date_range
50
+ date_range = [d.date() for d in date_range]
51
+
52
+ # Filter medserv based on the date range and name criteria
53
+ matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
54
+ ((medserv['First Name'].str.lower() == first_name.lower()) |
55
+ (medserv['Last Name'].str.lower() == last_name.lower())))]
56
+
57
+ # If no match is found, append the row to the unmatched_rows list
58
+ if matches.empty:
59
+ unmatched_rows.append(mindbody.iloc[idx])
60
 
61
+ # Create a DataFrame from the unmatched_rows list
62
+ unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
 
 
63
 
64
+ # Specify the columns to include in the output Excel file
65
+ columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
 
 
 
66
 
67
+ # Format the 'DOS' column to remove time part
68
+ unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
 
69
 
70
+ output_file_path = 'Comparison Results.xlsx'
71
+ unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
72
 
73
+ return output_file_path
 
74
 
 
 
 
 
 
 
 
75
 
76
 
77