Skip to content

Commit

Permalink
auto-collating
Browse files Browse the repository at this point in the history
  • Loading branch information
tomyoung903 committed Jun 13, 2024
1 parent c93c7d5 commit 6c34b68
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions tools/caption/caption_llama3.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,24 @@ def extract_batch(texts, prompt):
row = batch[idx]
writer.writerow([*row, keywords])
dist.barrier()

if dist.get_rank() == 0:
print("All ranks are finished. Collating the processed data to {}".format(output_file))
import pandas as pd
csv_files = [os.path.splitext(args.input)[0] + f"_rank{i}" + "_llama3.csv" for i in range(dist.get_world_size())]
# List to hold DataFrames
dataframes = []
# Read each CSV into a DataFrame and append to list
for file in csv_files:
df = pd.read_csv(file)
dataframes.append(df)
# Concatenate all DataFrames
combined_df = pd.concat(dataframes, ignore_index=True)

collated_file = os.path.splitext(args.input)[0] + "_llama3.csv"
# Save the combined DataFrame to a new CSV file
combined_df.to_csv(collated_file, index=False)
print("Collated data saved to {}".format(collated_file))
# terminate distributed env
dist.destroy_process_group()

Expand Down

0 comments on commit 6c34b68

Please sign in to comment.