rephrase / Paraphrase.py
Ketan1011's picture
Upload Paraphrase.py
efcf8db verified
raw
history blame
6.32 kB
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import torch
def chunk_text(text, max_length, tokenizer):
"""Split text into chunks of a specified maximum token length."""
tokens = tokenizer.encode(text, truncation=False)
chunks = []
while len(tokens) > max_length:
chunk = tokens[:max_length]
tokens = tokens[max_length:]
chunks.append(chunk)
if tokens:
chunks.append(tokens)
return chunks
def adjust_lengths(paragraph_length):
"""Adjust max_length and min_length based on the input length."""
if paragraph_length < 100:
return 100, 50 # Shorter paragraphs
elif paragraph_length < 500:
return 300, 150 # Medium-length paragraphs
else:
return 600, 300 # Longer paragraphs
def paraphrase_paragraph(paragraph, model_name='google/pegasus-multi_news'):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
tokenizer = PegasusTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=True)
# Tokenize the entire paragraph to calculate length
tokens = tokenizer.encode(paragraph, truncation=False)
paragraph_length = len(tokens)
# Adjust max_length and min_length dynamically
max_length, min_length = adjust_lengths(paragraph_length)
# Chunk the paragraph based on the model's token limit
chunks = chunk_text(paragraph, tokenizer.model_max_length, tokenizer)
paraphrased_chunks = []
for chunk in chunks:
# Decode chunk tokens back to text
chunk_t = tokenizer.decode(chunk, skip_special_tokens=True)
# Tokenize the text chunk
inputs = tokenizer(chunk_t, return_tensors='pt', padding=True, truncation=True).to(device)
# Generate paraphrased text
with torch.no_grad(): # Avoid gradient calculations for inference
generated_ids = model.generate(
inputs['input_ids'],
max_length=max_length, # Dynamically adjusted
min_length=min_length, # Dynamically adjusted
num_beams=3,
early_stopping=True
)
paraphrased_chunk = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
paraphrased_chunks.append(paraphrased_chunk)
# Combine all paraphrased chunks
paraphrased_paragraph = ' '.join(paraphrased_chunks)
return paraphrased_paragraph
# para = '''Commodity markets wrapped up a turbulent week (ended September 13) with a surge of optimism, driven by growing expectations of a jumbo rate cut in the upcoming FOMC (Federal Open Market Committee) meeting. The dollar fell below 101, and the yield on the US two-year Treasury note dropped to 3.56 percent, its lowest point since March 2023. This decline reflected renewed speculation about a significant 50 bps cut scheduled for September 18. The European Central Bank (ECB) announced a 25 bps rate cut, as anticipated. However, the ECB’s Governing Council moderated expectations for further reductions in October, emphasising a data-dependent, meeting-by-meeting approach to future monetary policy decisions, pushing Euro higher against the dollar. Swap traders are currently pricing in a 40 percent chance of a 50 basis point rate cut on September 18. '''
para = '''
Shares of Bajaj Housing Finance made a solid market debut on September 16 and ended with a huge premium of nearly 136 percent against the issue price of Rs 70.
The stock was listed at Rs 150 on the BSE and NSE, reflecting a jump of 114.28 percent from the issue price.
It zoomed 135.7 percent to settle at Rs 164.99 — its upper circuit limit — on the BSE.
At the NSE, shares of the firm surged 135.71 percent to settle at the upper circuit limit of Rs 165.
The company's market valuation stood at Rs 1,37,406.09 crore.
Arkade Developers IPO subscribed 5.8 times on day 1; retail portion subscribed 8 times Arkade Developers IPO subscribed 5.8 times on day 1; retail portion subscribed 8 times
In traded volume terms, 608.99 lakh shares of the firm were traded on the BSE and 6,367.27 lakh shares on the NSE during the session.
"Magnificent subscription demand by breaking all records got listed as per our expectation. We believe Bajaj brand always rewarded investors and housing finance business give similar opportunity to invest in one of India's leading players in the housing finance sector," Prashanth Tapse, Senior VP (Research) at Mehta Equities said.
The Rs 6,560-crore initial public offer of Bajaj Housing Finance received 63.60 times subscription on September 11, the last day of bidding, amid overwhelming participation from institutional buyers.
The initial share sale had a price band for the offer at Rs 66-70 per share.
The IPO had a fresh issue of equity shares of up to Rs 3,560 crore and an offer-for-sale (OFS) of equity shares worth Rs 3,000 crore by parent Bajaj Finance.
In the equity market, the 30-share BSE Sensex climbed 97.84 points or 0.12 per cent to settle at a new record peak of 82,988.78. During the day, it jumped 293.4 points or 0.35 per cent to hit a fresh lifetime intra-day high of 83,184.34.
The NSE Nifty went up by 27.25 points or 0.11 percent to settle at 25,383.75. During the day, the benchmark gained 89.2 points or 0.35 percent to hit a new intra-day record peak of 25,445.70.
The share sale was conducted to comply with the Reserve Bank of India's (RBI) regulations, which require upper-layer non-banking finance companies to be listed on stock exchanges by September 2025.
Proceeds from the fresh issue will be used to augment the company's capital base to meet future capital requirements.
It is a non-deposit-taking housing finance company registered with the National Housing Bank in September 2015.
The firm offers financial solutions for purchasing and renovating residential and commercial properties.
It has been identified and categorised as an "upper layer" NBFC by the RBI, and its comprehensive mortgage products include home loans, loans against property, lease rental discounting and developer financing.
'''
# response = paraphrase_paragraph(para)
# print(para)
# print('---'*30)
# print()
# print(response)
# print()
# print(len(response))