#Regular expressions -
Checking the documentation of re
import re
help(re)
dir(re)
help(re)
dir(re)
The re.compile() function returns a regular expression object. This object represents the compiled version of the regular expression pattern.
import re
pattern = re.compile(r'\d+') # This pattern matches one or more digits.
So, in the regular expression pattern \d{4}, \d is used to match exactly one digit, and {4} specifies that this digit should occur exactly four times in sequence. Therefore, \d{4} is a pattern that matches sequences of four consecutive digits.
string = "The Euro STOXX 600 index, which tracks all stock markets across Europe including the
FTSE, fell by 11.48% – the worst day since it launched in 1998. The panic selling prompted by
the coronavirus has wiped £2.7tn off the value of
STOXX 600 shares since its all-time peak on 19
February."
import re
# Define a raw string with a regular expression pattern
s = r"\d{4}"
# Compile the regular expression pattern
t = re.compile(s)
# Use the compiled pattern to find all occurrences in the string
result = re.findall(t, string)
# Print the result
print(result)
###################################################################################
>>> import re
>>> result = re.search(r"\d{3}", string)
>>> result
<re.Match object; span=(15, 18), match='600'>
So, in the regular expression pattern \d{4}, \d is used to match exactly one digit, and {4} specifies that this digit should occur exactly four times in sequence. Therefore, \d{4} is a pattern that matches sequences of four consecutive digits.
###################################################################################
Here's a brief breakdown of how the pattern works:
\w: Match a single word character (alphanumeric character or underscore).
{3}: Match exactly three occurrences of the preceding word character
>>> result = re.match(r"\w{3}", string)
>>> result
<re.Match object; span=(0, 3), match='The'>
###################################################################################
>>> result = re.findall(r"\d{3}", string)
>>> result
['600', '199', '600']
###################################################################################
>>> result = re.split(r"\s", string)
>>> result
['The', 'Euro', 'STOXX', '600', 'index,', 'which', 'tracks', 'all', 'stock', 'markets', 'across', 'Europe', 'including', 'the', '', 'FTSE,', 'fell', 'by', '11.48%', '–', 'the', 'worst', 'day', 'since', 'it', 'launched', 'in', '1998.', 'The', 'panic', 'selling', 'prompted', 'by', 'the', 'coronavirus', 'has', 'wiped', '£2.7tn', 'off', 'the', 'value', 'of', 'STOXX', '600', 'shares', 'since', 'its', 'all-time', 'peak', 'on', '19', 'February.']
###################################################################################
>>> result = re.sub(r"[A-Z]{2,}", "INDEX",string)
>>> result
'The Euro INDEX 600 index, which tracks all stock markets across Europe including the INDEX, fell by 11.48% – the worst day since it launched in 1998. The panic selling prompted by the coronavirus has wiped £2.7tn off the value of INDEX 600 shares since its all-time peak on 19 February.'
In simpler terms, the re.sub() function is replacing any sequence of two or more consecutive uppercase letters in the original string with the word "INDEX".
###################################################################################
>>> result = re.findall(r"the", string)
>>> result
['the', 'the', 'the', 'the']
###################################################################################
'''Prints three charecters of decimals'''
string = "The Euro STOXX 600 index, which tracks all stock markets across Europe including the FTSE, fell by 11.48% – the worst day since it launched in 1998. The panic selling prompted by the coronavirus has wiped £2.7tn off the value of STOXX 600 shares since its all-time peak on 19 February."
import re
result = re.findall(r"\d{3}", string)
print(result)
###################################################################################
'''Spilts all words into List of charecters '''
string = "The Euro STOXX 600 index, which tracks all stock markets across Europe including the FTSE, fell by 11.48% – the worst day since it launched in 1998. The panic selling prompted by the coronavirus has wiped £2.7tn off the value of STOXX 600 shares since its all-time peak on 19 February."
import re
result = re.split(r"\s", string)
print(result)
###################################################################################
'''It will remove the Index charecter'''
import re
pattern = r'\d+' # This is a regular expression pattern to match one or more digits
replacement = 'X'
text = 'There are 123 apples and 456 oranges.'
result = re.sub(pattern, replacement, text)
print(result)
'''Output -- There are X apples and X oranges.'''
###################################################################################
No comments:
Post a Comment