Spaces:

ccolas
/

TastyPiano

Runtime error

TastyPiano / src /cocktails /utilities /ingredients_utilities.py

Cédric Colas

initial commit

e775f6d about 2 years ago

11.4 kB

	# This script loads the list and profiles of our ingredients selection.
	# It defines rules to recognize ingredients from the list in recipes and the function to extract that information from ingredient strings.

	import pandas as pd
	from src.cocktails.config import INGREDIENTS_LIST_PATH, COCKTAILS_CSV_DATA
	import numpy as np

	ingredient_profiles = pd.read_csv(INGREDIENTS_LIST_PATH)
	ingredient_list = [ing.lower() for ing in ingredient_profiles['ingredient']]
	n_ingredients = len(ingredient_list)
	ingredient2ingredient_id = dict(zip(ingredient_list, range(n_ingredients)))

	ingredients_types = sorted(set(ingredient_profiles['type']))
	# for each type, get all ingredients
	ing_per_type = [[ing for ing in ingredient_list if ingredient_profiles['type'][ingredient_list.index(ing)] == type] for type in ingredients_types]
	ingredients_per_type = dict(zip(ingredients_types, ing_per_type))

	bubble_ingredients = ['soda', 'ginger beer', 'tonic', 'sparkling wine']
	# rules to recognize ingredients in recipes.
	# in [] are separate rules with an OR relation: only one needs to be satisfied
	# within [], rules apply with and AND relation: all rules need to be satisfied.
	# ~ indicates that the following expression must NOT appear
	# simple expression indicate that the expression MUST appear.
	ingredient_search = {#'salt': ['salt'],
	'lime juice': [['lime', '~soda', '~lemonade', '~cordial']],
	'lemon juice': [['lemon', '~soda', '~lemonade']],
	'angostura': [['angostura', '~orange'],
	['bitter', '~campari', '~orange', '~red', '~italian', '~fernet']],
	'orange bitters': [['orange', 'bitter', '~bittersweet']],
	'orange juice': [['orange', '~bitter', '~jam', '~marmalade', '~liqueur', '~water'],
	['orange', 'squeeze']],
	'pineapple juice': [['pineapple']],
	# 'apple juice': [['apple', 'juice', '~pine']],
	'cranberry juice': [['cranberry', 'juice']],
	'cointreau': ['cointreau', 'triple sec', 'grand marnier', 'curaçao', 'curacao'],
	'luxardo maraschino': ['luxardo', 'maraschino', 'kirsch'],
	'amaretto': ['amaretto'],
	'benedictine': ['benedictine', 'bénédictine', 'bénedictine', 'benédictine'],
	'campari': ['campari', ['italian', 'red', 'bitter'], 'aperol', 'bittersweet', 'aperitivo', 'orange-red'],
	# 'campari': ['campari', ['italian', 'red', 'bitter']],
	# 'crème de violette': [['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']],
	# 'aperol': ['aperol', 'bittersweet', 'aperitivo', 'orange-red'],
	'green chartreuse': ['chartreuse'],
	'black raspberry liqueur': [['cassis', 'liqueur'],
	['black raspberry', 'liqueur'],
	['raspberry', 'liqueur'],
	['strawberry', 'liqueur'],
	['blackberry', 'liqueur'],
	['violette', 'crème'], ['crême', 'violette'], ['liqueur', 'violette']],
	# 'simple syrup': [],
	# 'drambuie': ['drambuie'],
	# 'fernet branca': ['fernet', 'branca'],
	'gin': [['gin', '~sloe', '~ginger']],
	'vodka': ['vodka'],
	'cuban rum': [['rum', 'puerto rican'], ['light', 'rum'], ['white', 'rum'], ['rum', 'havana', '~7'], ['rum', 'bacardi']],
	'cognac': [['cognac', '~grand marnier', '~cointreau', '~orange']],
	# 'bourbon': [['bourbon', '~liqueur']],
	# 'tequila': ['tequila', 'pisco'],
	# 'tequila': ['tequila'],
	'scotch': ['scotch'],
	'dark rum': [['rum', 'age', '~bacardi', '~havana'],
	['rum', 'dark', '~bacardi', '~havana'],
	['rum', 'old', '~bacardi', '~havana'],
	['rum', 'old', '7'],
	['rum', 'havana', '7'],
	['havana', 'rum', 'especial']],
	'absinthe': ['absinthe'],
	'rye whiskey': ['rye', ['bourbon', '~liqueur']],
	# 'rye whiskey': ['rye'],
	'apricot brandy': [['apricot', 'brandy']],
	# 'pisco': ['pisco'],
	# 'cachaça': ['cachaça', 'cachaca'],
	'egg': [['egg', 'white', '~yolk', '~whole']],
	'soda': [['soda', 'water', '~lemon', '~lime']],
	'mint': ['mint'],
	'sparkling wine': ['sparkling wine', 'prosecco', 'champagne'],
	'ginger beer': [['ginger', 'beer'], ['ginger', 'ale']],
	'tonic': [['tonic'], ['7up'], ['sprite']],
	# 'espresso': ['espresso', 'expresso', ['café', '~liqueur', '~cream'],
	# ['cafe', '~liqueur', '~cream'],
	# ['coffee', '~liqueur', '~cream']],
	# 'southern comfort': ['southern comfort'],
	# 'cola': ['cola', 'coke', 'pepsi'],
	'double syrup': [['sugar','~raspberry'], ['simple', 'syrup'], ['double', 'syrup']],
	# 'grenadine': ['grenadine', ['pomegranate', 'syrup']],
	'grenadine': ['grenadine', ['pomegranate', 'syrup'], ['raspberry', 'syrup', '~black']],
	'honey syrup': ['honey', ['maple', 'syrup']],
	# 'raspberry syrup': [['raspberry', 'syrup', '~black']],
	'dry vermouth': [['vermouth', 'dry'], ['vermouth', 'white'], ['vermouth', 'french'], 'lillet'],
	'sweet vermouth': [['vermouth', 'sweet'], ['vermouth', 'red'], ['vermouth', 'italian']],
	# 'lillet blanc': ['lillet'],
	'water': [['water', '~sugar', '~coconut', '~soda', '~tonic', '~honey', '~orange', '~melon']]
	}
	# check that there is a rule for all ingredients in the list
	assert sorted(ingredient_list) == sorted(ingredient_search.keys()), 'ing search dict keys do not match ingredient list'

	def get_ingredients_info():
	data = pd.read_csv(COCKTAILS_CSV_DATA)
	max_ingredients, ingredient_set, liquor_set, liqueur_set, vermouth_set = get_max_n_ingredients(data)
	ingredient_list = sorted(ingredient_set)
	alcohol = sorted(liquor_set.union(liqueur_set).union(vermouth_set).union(set(['sparkling wine'])))
	ind_alcohol = [i for i in range(len(ingredient_list)) if ingredient_list[i] in alcohol]
	return max_ingredients, ingredient_list, ind_alcohol

	def get_max_n_ingredients(data):
	max_count = 0
	ingredient_set = set()
	alcohol_set = set()
	liqueur_set = set()
	vermouth_set = set()
	ing_str = np.array(data['ingredients_str'])
	for i in range(len(data['names'])):
	ingredients, quantities = extract_ingredients(ing_str[i])
	max_count = max(max_count, len(ingredients))
	for ing in ingredients:
	ingredient_set.add(ing)
	if ing in ingredients_per_type['liquor']:
	alcohol_set.add(ing)
	if ing in ingredients_per_type['liqueur']:
	liqueur_set.add(ing)
	if ing in ingredients_per_type['vermouth']:
	vermouth_set.add(ing)
	return max_count, ingredient_set, alcohol_set, liqueur_set, vermouth_set

	def find_ingredient_from_str(ing_str):
	# function that assigns an ingredient string to one of the ingredient if possible, following the rules defined above.
	# return a flag and the ingredient string. When flag is false, the ingredient has not been found and the cocktail is rejected.
	ing_str = ing_str.lower()
	flags = []
	for k in ingredient_list:
	or_flags = [] # get flag for each of several conditions
	for i_p, pattern in enumerate(ingredient_search[k]):
	or_flags.append(True)
	if isinstance(pattern, str):
	if pattern[0] == '~' and pattern[1:] in ing_str:
	or_flags[-1] = False
	elif pattern[0] != '~' and pattern not in ing_str:
	or_flags[-1] = False
	elif isinstance(pattern, list):
	for element in pattern:
	if element[0] == '~':
	or_flags[-1] = or_flags[-1] and not element[1:] in ing_str
	else:
	or_flags[-1] = or_flags[-1] and element in ing_str
	else:
	raise ValueError
	flags.append(any(or_flags))
	if sum(flags) > 1:
	print(ing_str)
	for i_f, f in enumerate(flags):
	if f:
	print(ingredient_list[i_f])
	stop = 1
	return True, ingredient_list[flags.index(True)]
	elif sum(flags) == 0:
	# if 'grape' not in ing_str:
	# print('\t\t Not found:', ing_str)
	return True, None
	else:
	return False, ingredient_list[flags.index(True)]

	def get_cocktails_per_ingredient(ing_strs):
	cocktails_per_ing = dict(zip(ingredient_list, [[] for _ in range(len(ingredient_list))]))
	for i_ing, ing_str in enumerate(ing_strs):
	ingredients, _ = extract_ingredients(ing_str)
	for ing in ingredients:
	cocktails_per_ing[ing].append(i_ing)
	return cocktails_per_ing

	def extract_ingredients(ingredient_str):
	# extract list of ingredients and quantities from an formatted ingredient string (reverse of format_ingredients)
	ingredient_str = ingredient_str[1: -1]
	words = ingredient_str.split(',')
	ingredients = []
	quantities = []
	for i in range(len(words)//2):
	ingredients.append(words[2 * i][1:])
	quantities.append(float(words[2 * i + 1][:-1]))
	return ingredients, quantities

	def format_ingredients(ingredients, quantities):
	# format an ingredient string from the lists of ingredients and quantities (reverse of extract_ingredients)
	out = '['
	for ing, q in zip(ingredients, quantities):
	if ing[-1] == ' ':
	ingre = ing[:-1]
	else:
	ingre = ing
	out += f'({ingre},{q}),'
	out = out[:-1] + ']'
	return out


	def get_ingredient_count(data):
	# get count of ingredients in the whole dataset
	ingredient_counts = dict(zip(ingredient_list, [0] * len(ingredient_list)))
	for i in range(len(data['names'])):
	if data['to_keep'][i]:
	ingredients, _ = extract_ingredients(data['ingredients_str'][i])
	for i in ingredients:
	ingredient_counts[i] += 1
	return ingredient_counts

	def add_counts_to_ingredient_list(data):
	# update the list of ingredients to add their count of occurence in dataset.
	ingredient_counts = get_ingredient_count(data)
	counts = [ingredient_counts[k] for k in ingredient_list]
	ingredient_profiles['counts'] = counts
	ingredient_profiles.to_csv(INGREDIENTS_LIST_PATH, index=False)