Spaces:
Runtime error
Runtime error
# This script loads the list and profiles of our ingredients selection. | |
# It defines rules to recognize ingredients from the list in recipes and the function to extract that information from ingredient strings. | |
import pandas as pd | |
from src.cocktails.config import INGREDIENTS_LIST_PATH, COCKTAILS_CSV_DATA | |
import numpy as np | |
ingredient_profiles = pd.read_csv(INGREDIENTS_LIST_PATH) | |
ingredient_list = [ing.lower() for ing in ingredient_profiles['ingredient']] | |
n_ingredients = len(ingredient_list) | |
ingredient2ingredient_id = dict(zip(ingredient_list, range(n_ingredients))) | |
ingredients_types = sorted(set(ingredient_profiles['type'])) | |
# for each type, get all ingredients | |
ing_per_type = [[ing for ing in ingredient_list if ingredient_profiles['type'][ingredient_list.index(ing)] == type] for type in ingredients_types] | |
ingredients_per_type = dict(zip(ingredients_types, ing_per_type)) | |
bubble_ingredients = ['soda', 'ginger beer', 'tonic', 'sparkling wine'] | |
# rules to recognize ingredients in recipes. | |
# in [] are separate rules with an OR relation: only one needs to be satisfied | |
# within [], rules apply with and AND relation: all rules need to be satisfied. | |
# ~ indicates that the following expression must NOT appear | |
# simple expression indicate that the expression MUST appear. | |
ingredient_search = {#'salt': ['salt'], | |
'lime juice': [['lime', '~soda', '~lemonade', '~cordial']], | |
'lemon juice': [['lemon', '~soda', '~lemonade']], | |
'angostura': [['angostura', '~orange'], | |
['bitter', '~campari', '~orange', '~red', '~italian', '~fernet']], | |
'orange bitters': [['orange', 'bitter', '~bittersweet']], | |
'orange juice': [['orange', '~bitter', '~jam', '~marmalade', '~liqueur', '~water'], | |
['orange', 'squeeze']], | |
'pineapple juice': [['pineapple']], | |
# 'apple juice': [['apple', 'juice', '~pine']], | |
'cranberry juice': [['cranberry', 'juice']], | |
'cointreau': ['cointreau', 'triple sec', 'grand marnier', 'curaçao', 'curacao'], | |
'luxardo maraschino': ['luxardo', 'maraschino', 'kirsch'], | |
'amaretto': ['amaretto'], | |
'benedictine': ['benedictine', 'bénédictine', 'bénedictine', 'benédictine'], | |
'campari': ['campari', ['italian', 'red', 'bitter'], 'aperol', 'bittersweet', 'aperitivo', 'orange-red'], | |
# 'campari': ['campari', ['italian', 'red', 'bitter']], | |
# 'crème de violette': [['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']], | |
# 'aperol': ['aperol', 'bittersweet', 'aperitivo', 'orange-red'], | |
'green chartreuse': ['chartreuse'], | |
'black raspberry liqueur': [['cassis', 'liqueur'], | |
['black raspberry', 'liqueur'], | |
['raspberry', 'liqueur'], | |
['strawberry', 'liqueur'], | |
['blackberry', 'liqueur'], | |
['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']], | |
# 'simple syrup': [], | |
# 'drambuie': ['drambuie'], | |
# 'fernet branca': ['fernet', 'branca'], | |
'gin': [['gin', '~sloe', '~ginger']], | |
'vodka': ['vodka'], | |
'cuban rum': [['rum', 'puerto rican'], ['light', 'rum'], ['white', 'rum'], ['rum', 'havana', '~7'], ['rum', 'bacardi']], | |
'cognac': [['cognac', '~grand marnier', '~cointreau', '~orange']], | |
# 'bourbon': [['bourbon', '~liqueur']], | |
# 'tequila': ['tequila', 'pisco'], | |
# 'tequila': ['tequila'], | |
'scotch': ['scotch'], | |
'dark rum': [['rum', 'age', '~bacardi', '~havana'], | |
['rum', 'dark', '~bacardi', '~havana'], | |
['rum', 'old', '~bacardi', '~havana'], | |
['rum', 'old', '7'], | |
['rum', 'havana', '7'], | |
['havana', 'rum', 'especial']], | |
'absinthe': ['absinthe'], | |
'rye whiskey': ['rye', ['bourbon', '~liqueur']], | |
# 'rye whiskey': ['rye'], | |
'apricot brandy': [['apricot', 'brandy']], | |
# 'pisco': ['pisco'], | |
# 'cachaça': ['cachaça', 'cachaca'], | |
'egg': [['egg', 'white', '~yolk', '~whole']], | |
'soda': [['soda', 'water', '~lemon', '~lime']], | |
'mint': ['mint'], | |
'sparkling wine': ['sparkling wine', 'prosecco', 'champagne'], | |
'ginger beer': [['ginger', 'beer'], ['ginger', 'ale']], | |
'tonic': [['tonic'], ['7up'], ['sprite']], | |
# 'espresso': ['espresso', 'expresso', ['café', '~liqueur', '~cream'], | |
# ['cafe', '~liqueur', '~cream'], | |
# ['coffee', '~liqueur', '~cream']], | |
# 'southern comfort': ['southern comfort'], | |
# 'cola': ['cola', 'coke', 'pepsi'], | |
'double syrup': [['sugar','~raspberry'], ['simple', 'syrup'], ['double', 'syrup']], | |
# 'grenadine': ['grenadine', ['pomegranate', 'syrup']], | |
'grenadine': ['grenadine', ['pomegranate', 'syrup'], ['raspberry', 'syrup', '~black']], | |
'honey syrup': ['honey', ['maple', 'syrup']], | |
# 'raspberry syrup': [['raspberry', 'syrup', '~black']], | |
'dry vermouth': [['vermouth', 'dry'], ['vermouth', 'white'], ['vermouth', 'french'], 'lillet'], | |
'sweet vermouth': [['vermouth', 'sweet'], ['vermouth', 'red'], ['vermouth', 'italian']], | |
# 'lillet blanc': ['lillet'], | |
'water': [['water', '~sugar', '~coconut', '~soda', '~tonic', '~honey', '~orange', '~melon']] | |
} | |
# check that there is a rule for all ingredients in the list | |
assert sorted(ingredient_list) == sorted(ingredient_search.keys()), 'ing search dict keys do not match ingredient list' | |
def get_ingredients_info(): | |
data = pd.read_csv(COCKTAILS_CSV_DATA) | |
max_ingredients, ingredient_set, liquor_set, liqueur_set, vermouth_set = get_max_n_ingredients(data) | |
ingredient_list = sorted(ingredient_set) | |
alcohol = sorted(liquor_set.union(liqueur_set).union(vermouth_set).union(set(['sparkling wine']))) | |
ind_alcohol = [i for i in range(len(ingredient_list)) if ingredient_list[i] in alcohol] | |
return max_ingredients, ingredient_list, ind_alcohol | |
def get_max_n_ingredients(data): | |
max_count = 0 | |
ingredient_set = set() | |
alcohol_set = set() | |
liqueur_set = set() | |
vermouth_set = set() | |
ing_str = np.array(data['ingredients_str']) | |
for i in range(len(data['names'])): | |
ingredients, quantities = extract_ingredients(ing_str[i]) | |
max_count = max(max_count, len(ingredients)) | |
for ing in ingredients: | |
ingredient_set.add(ing) | |
if ing in ingredients_per_type['liquor']: | |
alcohol_set.add(ing) | |
if ing in ingredients_per_type['liqueur']: | |
liqueur_set.add(ing) | |
if ing in ingredients_per_type['vermouth']: | |
vermouth_set.add(ing) | |
return max_count, ingredient_set, alcohol_set, liqueur_set, vermouth_set | |
def find_ingredient_from_str(ing_str): | |
# function that assigns an ingredient string to one of the ingredient if possible, following the rules defined above. | |
# return a flag and the ingredient string. When flag is false, the ingredient has not been found and the cocktail is rejected. | |
ing_str = ing_str.lower() | |
flags = [] | |
for k in ingredient_list: | |
or_flags = [] # get flag for each of several conditions | |
for i_p, pattern in enumerate(ingredient_search[k]): | |
or_flags.append(True) | |
if isinstance(pattern, str): | |
if pattern[0] == '~' and pattern[1:] in ing_str: | |
or_flags[-1] = False | |
elif pattern[0] != '~' and pattern not in ing_str: | |
or_flags[-1] = False | |
elif isinstance(pattern, list): | |
for element in pattern: | |
if element[0] == '~': | |
or_flags[-1] = or_flags[-1] and not element[1:] in ing_str | |
else: | |
or_flags[-1] = or_flags[-1] and element in ing_str | |
else: | |
raise ValueError | |
flags.append(any(or_flags)) | |
if sum(flags) > 1: | |
print(ing_str) | |
for i_f, f in enumerate(flags): | |
if f: | |
print(ingredient_list[i_f]) | |
stop = 1 | |
return True, ingredient_list[flags.index(True)] | |
elif sum(flags) == 0: | |
# if 'grape' not in ing_str: | |
# print('\t\t Not found:', ing_str) | |
return True, None | |
else: | |
return False, ingredient_list[flags.index(True)] | |
def get_cocktails_per_ingredient(ing_strs): | |
cocktails_per_ing = dict(zip(ingredient_list, [[] for _ in range(len(ingredient_list))])) | |
for i_ing, ing_str in enumerate(ing_strs): | |
ingredients, _ = extract_ingredients(ing_str) | |
for ing in ingredients: | |
cocktails_per_ing[ing].append(i_ing) | |
return cocktails_per_ing | |
def extract_ingredients(ingredient_str): | |
# extract list of ingredients and quantities from an formatted ingredient string (reverse of format_ingredients) | |
ingredient_str = ingredient_str[1: -1] | |
words = ingredient_str.split(',') | |
ingredients = [] | |
quantities = [] | |
for i in range(len(words)//2): | |
ingredients.append(words[2 * i][1:]) | |
quantities.append(float(words[2 * i + 1][:-1])) | |
return ingredients, quantities | |
def format_ingredients(ingredients, quantities): | |
# format an ingredient string from the lists of ingredients and quantities (reverse of extract_ingredients) | |
out = '[' | |
for ing, q in zip(ingredients, quantities): | |
if ing[-1] == ' ': | |
ingre = ing[:-1] | |
else: | |
ingre = ing | |
out += f'({ingre},{q}),' | |
out = out[:-1] + ']' | |
return out | |
def get_ingredient_count(data): | |
# get count of ingredients in the whole dataset | |
ingredient_counts = dict(zip(ingredient_list, [0] * len(ingredient_list))) | |
for i in range(len(data['names'])): | |
if data['to_keep'][i]: | |
ingredients, _ = extract_ingredients(data['ingredients_str'][i]) | |
for i in ingredients: | |
ingredient_counts[i] += 1 | |
return ingredient_counts | |
def add_counts_to_ingredient_list(data): | |
# update the list of ingredients to add their count of occurence in dataset. | |
ingredient_counts = get_ingredient_count(data) | |
counts = [ingredient_counts[k] for k in ingredient_list] | |
ingredient_profiles['counts'] = counts | |
ingredient_profiles.to_csv(INGREDIENTS_LIST_PATH, index=False) |