Author
Mamayusupov Rifat.
Usage
from transformers import SeamlessM4TFeatureExtractor, Wav2Vec2BertProcessor, Wav2Vec2CTCTokenizer, Wav2Vec2BertForCTC
from transformers import pipeline
# Initialize tokenizer
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("/home/rifat/asr", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|")
# Initialize feature extractor
feature_extractor = SeamlessM4TFeatureExtractor(feature_size=80, num_mel_bins=80, sampling_rate=16000, padding_value=0.0)
# Initialize processor
processor = Wav2Vec2BertProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
# Initialize model
model = Wav2Vec2BertForCTC.from_pretrained(
args.pretrained_model,
attention_dropout=0.0,
hidden_dropout=0.0,
feat_proj_dropout=0.0,
mask_time_prob=0.0,
layerdrop=0.0,
ctc_loss_reduction="mean",
add_adapter=True,
pad_token_id=processor.tokenizer.pad_token_id,
vocab_size=len(processor.tokenizer),
ignore_mismatched_sizes=True
)
model.config.ctc_zero_infinity = True
model.to("cuda")
# Perform inference
# Initialize the pipeline
pipe = pipeline(model=model, tokenizer=processor.tokenizer, feature_extractor=feature_extractor, task="automatic-speech-recognition")
input_audio = ""
print(pipe(input_audio)['result_text'])
- Downloads last month
- 0
Inference API (serverless) does not yet support adapter-transformers models for this pipeline type.
Model tree for blackhole33/wev2vec-commonVoice_v1
Base model
facebook/wav2vec2-base-960h