Strings
A string is a sequence of characters usually represented as an array data structure
- string is generally considered as a data type
Characters Set
A character set is an element of internationalization that maps and translates an alphabet; that is, the characters that are used in a particular language.
Character Encoding
Character Encoding is the process of assigning numbers to graphical characters such as human languages
ASCII
American Standard Code for Information Interchange (ASCII)
- 7-bit (stored as 1-byte in memory)
- 0-127, total 255 chars
- A - 65 to Z - 90
- a - 97 to z - 122
- 0 - 48 to 9 - 57
- Enter - 10
- Space - 13
- ESC - 27
Unicode
16-bit (stored as 2-byte in memory)
Represented in Hexadecimal form
A - 65 to Z - 90
a - 97 to z - 122
0 - 48 to 9 - 57
Enter - 10
Space - 32
ESC - 27
Code-point
U+00639
UTF-8
System for storing and representing code-points (encoding).
Characters
- 1-byte
- Character string must be enclosed in single quotes
char temp = 'A'; // STORED AS 65
printf("%c", temp); // A
printf("%d", temp); // 65
Strings Data
Strings: Array of Characters.
- String is an array of chars with a string delimiter at the end.
- To know the end of a string, a null char
\0
(or string delimiter or end of the string char) is used to mark the end of the string. - String can be enclosed in double-quotes.
- String is mutable if defined as
A[]
and is immutable if defined as pointer*A = "Hello"
char temp[5] = {'A', 'B', 'C', 'D', 'E'};
printf("%c", temp[0]); // A
printf("%s", temp); // ABCDE
char temp1[] = "ABCDE";
printf("%s", temp1); // ABCDE
printf("%lu", sizeof(temp1)); // 6
scanf("%s", temp1);
char temp2[5] = {'A', 'B'}; // RESET WILL BE 0
char temp3[5] = {'A', 'B', 'C', 'D', '\0'};
char temp4[5] = {'A', 'B', 'C', '\0', 'D'};
printf("%s", temp3); // ABCD
printf("%s", temp4); // ABC
scanf
treats space and enter as the end of stringTo input string with spaces use
fgets()
(you could usegets
but it is deprecated in C11 standard as it suffers from buffer overflow as it doesn't do any array bound testing).cchar name[5]; fgets(name, 5, stdin);
You can check the size of the string using
strlen
fgets()
adds\n
(new line char) at the end which should be handled manuallyc#include <string.h> char name[5]; fgets(name, 5, stdin); // ABCDE printf("%s", name); // ABCD printf("%lu", strlen(name)); // 4 name[strcspn(name, "")] = '\0';
String Length
Number of characters in the string.
int main()
{
char S[] = "WELCOME";
int i;
for (i = 0; S[i] != '\0'; i++)
{
}
printf("%d", i); // 7
return 0;
}
Convert Uppercase to Lowercase:
for (i = 0; S[i] != '\0'; i++)
{
S[i] = S[i] + 32; // A:65 and a:97 So, 97-65=32
}
printf("%s", S); // welcome
Toggle case:
int main()
{
char S[] = "25WeLComE@";
int i;
for (i = 0; S[i] != '\0'; i++)
{
if (S[i] >= 65 && S[i] <= 90)
S[i] += 32;
else if (S[i] >= 97 && S[i] <= 122)
S[i] -= 32;
}
printf("%s", S); // 25wElcOMe@
return 0;
}
Counting number of vowels and consonants:
int main()
{
char S[] = "How are you?";
int i, v = 0, c = 0;
for (i = 0; S[i] != '\0'; i++)
{
// you can use S[i] == 'a' also
if (S[i] == 65 || S[i] == 69 || S[i] == 73 || S[i] == 79 || S[i] == 85 || S[i] == 97 || S[i] == 101 || S[i] == 105 || S[i] == 111 || S[i] == 117)
v++;
else if ((S[i] >= 65 && S[i] <= 90) || (S[i] >= 97 && S[i] <= 122))
c++;
}
printf("%d", v); // 5
printf("%d", c); // 4
return 0;
}
Counting number of words:
int main()
{
char S[] = "How are you?";
int i, count = 0;
for (i = 0; S[i] != '\0'; i++)
if (S[i] == 32 && S[i + 1] != 32)
count++;
printf("%d", count + 1); // 3
return 0;
}
Check string validity (should only contain alphabets and no special chars):
int string_validity(char str[])
{
int i;
for (i = 0; str[i] != '\0'; i++)
if (str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122)
return 0;
return 1;
}
printf("%d", string_validity("Howareyou?"));
// OUTPUT:
// 0
2nd Version:
int string_validity(char *str)
{
int i;
for (i = 0; str[i] != '\0'; i++)
if (str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122)
return 0;
return 1;
}
int main()
{
char *S = "Howareyou?";
printf("%s", S);
printf("%d", string_validity(S));
return 0;
}
Reversing string:
int main()
{
char S[] = "Python";
char reverse[7];
int i, j;
for (i = 0; S[i] != '\0'; i++)
{
}
i--;
for (j = 0; i >= 0; i--, j++)
reverse[j] = S[i];
reverse[j] = '\0';
printf("%s", S);
printf("%s", reverse);
return 0;
}
2nd version:
int main()
{
char S[] = "Python";
char temp;
int i, j;
for (j = 0; S[j] != '\0'; j++)
{
}
j--;
for (i = 0; i < j; i++, j--)
{
temp = S[i];
S[i] = S[j];
S[j] = temp;
}
printf("%s\n", S);
return 0;
}
Bitwise operations
- Left shift
- Bits ORing (Merging)
- Bits ANDing (Masking)
Finding duplicate chars in a string:
int main()
{
char B[] = "Painting";
int i, H = 0, x = 0;
for (i = 0; B[i] != '\0'; i++)
{
x = 1;
x = x << (B[i] - 97);
if ((x & H) > 0)
{
printf("%c is duplicate\n", B[i]);
}
else
H = x | H;
}
return 0;
}
Anagram
Words made up of same letters (using hashing):
int main()
{
char A[] = "decimal";
char B[] = "medical";
// char C[] = "medicah";
int i, j, H[26] = {0};
for (i = 0; A[i] != '\0'; i++)
H[A[i] - 97] += 1;
for (j = 0; B[j] != '\0'; j++)
{
H[B[j] - 97] -= 1;
if (H[B[j] - 97] < 0)
{
printf("Not anagrams\n");
break;
}
}
return 0;
}
Permutations of String
- State space tree
- Back tracking
- Brute force