Linux internals and network programming

hvn-network

Monday, August 22, 2016

Sorting and Searching Algorithms

created by Frank

1. Sorting

in this section, we will compare the performance of basic sorting algorithms: bubble sort, selection sort, insertion sort, quick sort, and heap sort.

in these algorithms, quick sort and heap sort are fastest but hardest to implement. (quick sort is usually fastest)
bubble sort is simplest but the its performance is lowest

about the complexity of algorithms:

bubble sort: O(n2)
insertion sort: O(n2)
selection sort: O(n2)
quick sort: average is O(nlogn), worst case is O(n2)
heap sort: O(nlogn)

because these algorithms are common, we do not mention their principle here

below is source code of sort algorithms

1.1 bubble sort

download source code of bubble sort

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define NUM 100000

int main()
{
  int arr[NUM];
  int i, j, k;
  int temp;
  double avg = 0;
  FILE *f;

  for (k = 0; k < 6; ++k)
  {
    char c = k + 48;
    char name[]={c};
    f = fopen(name, "r");
    printf("%s%s%s\n","reading file ", name,"..." );
    for (i = 0; i < NUM; ++i)
    {
      fscanf(f, "%d", &arr[i]);
    }
    fclose(f);

    printf("%s\n", "sorting ...");
    clock_t start = clock();
    for(i = (NUM - 1); i >= 0; i--)
    {
      for(j = 1; j <= i; j++)
      {
        if(arr[j-1] > arr[j])
        {
          temp = arr[j-1];
          arr[j-1] = arr[j];
          arr[j] = temp;
        } 
      } 
    }
    clock_t end = clock();
    printf("%s\n", "sorted...");
    double t = (double)(end - start)/CLOCKS_PER_SEC;
    avg += t;
    printf("time = %f\n\n",t);
  }
  printf("sum of time = %f\n",avg);
  printf("average time = %f\n", avg/6);
  // for(i = 0; i < NUM; i++)
  //   printf(" %d", arr[i]);

  // printf("\n");
  return 0;
}

1.2 insertion sort

download source code of insertion sort

#include 
#include 
#include 

#define NUM 100000

int main()
{
  int i, j, k;
  int arr[NUM];
  double avg = 0;
  FILE *f;

  for (k = 0; k < 6; ++k)
  {
    char c = k + 48;
    char name[]={c};
    f = fopen(name, "r");
    printf("%s%s%s\n","reading file ", name,"..." );
    for (i = 0; i < NUM; ++i)
    {
      fscanf(f, "%d", &arr[i]);
    }
    fclose(f);

    printf("%s\n", "sorting ...");
    clock_t start = clock();
  
    for (i=0; i < NUM; i++)
    {
          int index = arr[i]; 
          int j = i;
          while (j > 0 && arr[j-1] > index)
          {
             arr[j] = arr[j-1];
             j--;
          }
          arr[j] = index;
    } 
    clock_t end = clock();
    printf("%s\n", "sorted...");
    double t = (double)(end - start)/CLOCKS_PER_SEC;
    avg += t;
    printf("time = %f\n\n",t);
  }
  printf("sum of time = %f\n",avg);
  printf("average time = %f\n", avg/6);
  
  return 0;
}

1.3 selection sort

download source code of selection sort

#include 
#include 
#include 

#define NUM 100000

int main()
{
  int i, j, k;
  int arr[NUM];
  double avg = 0;
  FILE *f;

  for (k = 0; k < 6; ++k)
  {
    char c = k + 48;
    char name[]={c};
    f = fopen(name, "r");
    printf("%s%s%s\n","reading file ", name,"..." );
    for (i = 0; i < NUM; ++i)
    {
      fscanf(f, "%d", &arr[i]);
    }
    fclose(f);

    printf("%s\n", "sorting ...");
    clock_t start = clock();
    for (i = 0; i < NUM-1; i++)
    {
        int min = i;
        for (j = i+1; j < NUM; j++)
            if (arr[j] < arr[min]) min = j;
        int temp = arr[i];
        arr[i] = arr[min];
        arr[min] = temp;
    } 
    clock_t end = clock();
    printf("%s\n", "sorted...");
    double t = (double)(end - start)/CLOCKS_PER_SEC;
    avg += t;
    printf("time = %f\n\n",t);
  }

  printf("sum of time = %f\n",avg);
  printf("average time = %f\n", avg/6);
 
 return 0;
}

1.4 quick sort

download source code of quick sort

#include 
#include 
#include 

#define NUM 100000

void quickSort( int[], int, int);
int partition( int[], int, int);

void main() 
{
 //int a[] = {7, 12, 1, -2, 0, 15, 4, 11, 9};
  int a[NUM];
 int i, k;
  long double avg = 0;
  FILE *f;

  for (k = 0; k < 6; ++k)
  {
    char c = k + 48;
    char name[]={c};
    f = fopen(name, "r");
    printf("%s%s%s\n","reading file ", name,"..." );
    for (i = 0; i < NUM; ++i)
    {
      fscanf(f, "%d", &a[i]);
    }
    fclose(f);

    printf("%s\n", "sorting...");
    clock_t begin = clock();
    quickSort(a, 0, NUM);
    clock_t end = clock();
    printf("%s\n", "sorted!!!");

    long double t = (long double)(end - begin) / CLOCKS_PER_SEC;
    printf("time = %Lf\n\n", t);
    avg += t;
  }

 printf("sum of time = %Lf\n",avg);
  printf("average time = %Lf\n", avg/6);
 
  

 // printf("\n\nSorted array is:  ");
 // for(i = 0; i < NUM; ++i)
 //  printf(" %d ", a[i]);

}

void quickSort( int a[], int l, int r)
{
   int j;

   if( l < r ) 
   {
       j = partition( a, l, r);
       quickSort( a, l, j-1);
       quickSort( a, j+1, r);
   }
 
}

int partition( int a[], int l, int r) {
   int pivot, i, j, t;
   pivot = a[l];
   i = l; j = r+1;
  
   while( 1)
   {
    do ++i; while( a[i] <= pivot && i <= r );
    do --j; while( a[j] > pivot );
    if( i >= j ) break;
    t = a[i]; a[i] = a[j]; a[j] = t;
   }
   t = a[l]; a[l] = a[j]; a[j] = t;
   return j;
}

1.5 heap sort

download source code of quick sort

#include 
#include 
#include 

#define NUM 100000
 
// To heapify a subtree rooted with node i which is
// an index in arr[]. n is size of heap

void heapify(int arr[], int n, int i)
{
    int largest = i;  // Initialize largest as root
    int temp;
    int l = 2*i + 1;  // left = 2*i + 1
    int r = 2*i + 2;  // right = 2*i + 2
 
    // If left child is larger than root
    if (l < n && arr[l] > arr[largest])
        largest = l;
 
    // If right child is larger than largest so far
    if (r < n && arr[r] > arr[largest])
        largest = r;
 
    // If largest is not root
    if (largest != i)
    {
        //Swap(&arr[i], &arr[largest]);
        temp = arr[i];
        arr[i] = arr[largest];
        arr[largest] =temp;
 
        // Recursively heapify the affected sub-tree
        heapify(arr, n, largest);
    }
}
 
// main function to do heap sort
void heapSort(int arr[], int n)
{
    // Build heap (rearrange array)
    int i;
    int temp;
    for (i = n / 2 - 1; i >= 0; i--)
        heapify(arr, n, i);
 
    // One by one extract an element from heap
    for (i=n-1; i>=0; i--)
    {
        // Move current root to end
        //Swap(&arr[0], &arr[i]);
        temp = arr[0];
        arr[0] = arr[i];
        arr[i] =temp;
 
        // call max heapify on the reduced heap
        heapify(arr, i, 0);
    }
}
 
// Driver program
int main()
{
    //int arr[] = {12, 11, 13, 5, 6, 7, 1, 2, 5, 7, 9, 10};
    int a[NUM];
    int i, k;
    long double avg = 0;
    FILE *f;


     for (k = 0; k < 6; ++k)
    {
      char c = k + 48;
      char name[]={c};
      f = fopen(name, "r");
      printf("%s%s%s\n","reading file ", name,"..." );
      for (i = 0; i < NUM; ++i)
      {
        fscanf(f, "%d", &a[i]);
      }
      fclose(f);

      printf("%s\n", "sorting ...");
      clock_t begin = clock();
      heapSort(a, NUM);
      clock_t end = clock();
      printf("%s\n", "sorted...");

      long double t = (long double)(end - begin) / CLOCKS_PER_SEC;
      printf("time = %Lf\n\n", t);
      avg += t;
    }
    
    printf("sum of time = %Lf\n",avg);
    printf("average time = %Lf\n", avg/6);
    
}

for simple, we use rand() function to generate input data of programs. the number of test case is 5 and the result is average of them.

download input data

The system for implementation:

Intel core I7-6820HQ CPU 2.7 GHz
OS ubuntu 12.04 64bit
Compiler: gcc ubuntu/linaro 4.6.3

See run time of these algorithms, we can see the dramatic difference among algorithms

	bubble	Selection	Insertion	Quick	Heap
Test case 0	30.30	9.38	6.01	0.02	0.03
Test case 1	30.39	9.39	5.99	0.01	0.03
Test case 2	30.41	9.48	5.98	0.01	0.02
Test case 3	30.38	9.43	5.95	0.01	0.03
Test case 4	30.74	9.73	5.98	0.01	0.03
Test case 5	30.38	9.55	6.08	0.01	0.03
Average	30.4333	9.4933	5.9983	0.0133	0.0283

download input data here

2. Searching

in this section, we will review 2 common searching: sequence searching and binary searching.

sequence searching:

References

https://vi.wikipedia.org/wiki/Thu%E1%BA%ADt_to%C3%A1n_s%E1%BA%AFp_x%E1%BA%BFp

http://quiz.geeksforgeeks.org/heap-sort/

Sunday, August 21, 2016

Hash function

Problem discription.

Example: We have 100 different strings. We store them in an array (each element of the array represents a string). When we want to search a string X, the simplest way is consider each element of the array then compare with X. However, this procedure is not effective in processing time, especially, when the number of the array elements is huge. To solve this problem, a concept hash map is introduced.

Definition.

A hash map is a data structure used to implement an associative array, a structure that can map keys and values. A hash map uses a hash function to compute an index into an array of buckets, from which the desired value can be found.

Fig. 1. An example of hash map

In Fig. 1, a hash function transforms key (name) into index of bucket (contains telephone number).

Basic operations

Insert an entry

Search an entry

Delete an entry

Advantages

The main advantage of ash tables over the table data structures is speed. This advantage is more apparent when the number of entries is large.

Disadvantages

The cost of a good hash function can be significantly higher than inner loop of the lookup algorithm for a sequential list or search tree. So hash maps are not effective when the number of entries is very small.

A simple example

We need to create contact list (hash table) with name (key) and telephone number (value).

First, the hash table is empty, we need to import contacts to hash table. The role of the hash function is mapping the name with the index of the hash table. For instance, the hash function returns the value that based on the first letter in the name. (Alexander -> 0, Bob -> 1, Dennis -> 3). Then we set the slot of index (0, 1, 3) with the respectively phone number.

After creating (by insertion) the hash table, we can search for the phone number with a given key. For example, we want to search phone number of Alexander, hash function returns 0. We can know exactly the phone number is corresponding to index 0.

Choosing a good hash function

A good hash function and implementation algorithm are essential for good hash table performance, but may be difficult to achieve.

A basic requirement is that the function should avoid collisions. A collision means there are more than one key have the same bucket index (Fig 2)

Fig. 2. Collision in a Hash map

A basic requirement is that the function should provide a uniform distribution af hash values. A non-uniform distribution increases the number of collisions and the cost of resolving them. Uniformity is sometimes difficult to ensure by design, but may be evaluated empirically using statistical tests.

Collision resolutions.

Hash collision are practically unavoidable when hashing a random subset of a large set of possible keys.

Separate chaining.

In the separate chaining method, each bucket has some sort of list of entries with the same index (Fig. 2). The time for hash table operations is the time to find the bucket plus the time for list operation.

Separate chaining with linked lists.

Separate chaining with linked lists is popular because they require only basic data structures with simple algorithms. The cost of a table operation is that of the scanning the entries of the selected bucket for the desired key. The disadvantages of separate chaining with linked lists is the overhead of the next pointer in each entry record can be significant, and the traversing a linked list has poor performance.

Separate chaining with other data structure

Instead of a list, we can use any other data structure that supports the required operations. For example, by using a self-balancing tree, the theoretical worst-case time of common hash table operations can be brought down to O(log n) rather than O(n). However, this approach is only worth when each slot has many entries.

The variant called array hash table uses a dynamic array to store all the entries that hash to the same slot.

An elaboration on this approach is using a second hash map for each bucket. This variant guarantees constant worst-case lookup time.

Open addressing.

In open addressing strategy, all entry records are stored in the bucket array itself. With this method, a hash collision is resolved by probing through alternate locations in the array until either the target record is found or an unused array slot is found, which indicates that there is no such key in the table.

Fig. 3. An example of open addressing technique

Implement a simple hash map in C programming language.

A hash map will be the collection of entries.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define CHAR_SIZE 100
#define SIZE      100

struct entry_s {
 char *key;
 char *value;
 struct entry_s *next;
};

typedef struct entry_s entry_t;

struct hash_table_s {
 int size;
 struct entry_s **table;
};

typedef struct hash_table_s hash_table_t;

/* allocate memory for hash table */
hash_table_t *create_ht(int size) {
 hash_table_t *hash_table = NULL;
 if (size < 1) return NULL;

 /* Allocate the table itself */
 if ((hash_table = malloc(sizeof(hash_table_t))) == NULL) {
  return NULL;
 }

 /* Allocate pointers to the head nodes */
 if ((hash_table-> table = malloc(sizeof(entry_t *)*size)) == NULL) {
  return NULL;
 }

 int i;
 for (i = 0; i < size; i++) {
  hash_table->table[i] = NULL;
 }

 hash_table -> size = size;
 return hash_table;
}

/* Hash a string for a particular hash table */
int hash(hash_table_t *hash_table, char *key) {
 unsigned long int hash_val = 0;
 int i = 0;
 for (i = 0; i < strlen(key); i++) {
  hash_val += key[i];
 }
 return hash_val % hash_table->size;
}

/* create a new entry for a given key and value */
entry_t *create_entry(char *key, char *value) {
 entry_t *new_entry;
 if ((new_entry = malloc(sizeof(entry_t))) == NULL) {
  return NULL;
 }
 if ((new_entry -> key = malloc(CHAR_SIZE)) == NULL) {
  return NULL;
 }
 if ((new_entry -> value = malloc(CHAR_SIZE)) == NULL) {
  return NULL;
 }
 strcpy(new_entry -> key, key);
 strcpy(new_entry -> value, value);

 new_entry -> next = NULL;
 return new_entry;
}

/* Insert a entry into a hash table */
void put_entry( hash_table_t *hash_table, char *key, char *value) {
 int index;
 entry_t *new_entry;

 new_entry = create_entry(key, value);
 index = hash(hash_table, key);
 //insert entry
 if (hash_table -> table[index] == NULL) {
  hash_table -> table[index] = new_entry;
 } else {
  entry_t *temp_entry = hash_table ->table[index] -> next;
  new_entry -> next = temp_entry;
  hash_table -> table[index] = new_entry;
 }
}

/* get value from a key */
char *get_value(hash_table_t *hash_table, char *key) {
 int index;
 entry_t *entry;
 index = hash(hash_table, key);
 entry = hash_table->table[index];
 while (entry != NULL) {
  if (strcmp(entry->key, key) == 0) {
   return entry->value;
  }
  entry = entry->next;
 }
 return NULL;
}

int main() {
 hash_table_t *hash_table = create_ht(SIZE);
 put_entry(hash_table, "key1", "apple");
 put_entry(hash_table, "key2", "orange");
 put_entry(hash_table, "key3", "tomato");

 printf("%s \n", get_value(hash_table, "key1"));
 printf("%s \n", get_value(hash_table, "key2"));
 printf("%s \n", get_value(hash_table, "key3"));
 return 0;
}

Download full code here

References

https://en.wikipedia.org/wiki/Hash_table

https://www.youtube.com/watch?v=tjtFkT97Xmc

https://en.wikipedia.org/wiki/Hash_function

Preprocessor

The C Preprocessor, often known as CPP, is a macro processor that is used automatically by the C compiler. CPP is a separate step (the first step) in the compilation process. In simple terms, a C Preprocessor is just a text substitution tool.

1. Overview

1.1. Character sets

The files input to CPP might be in any character set at all. CPP’s very first action, before it even looks for line boundaries, is to convert the file into the character set it uses for internal processing. That set is what the C standard calls the source character set. It must be isomorphic with Unicode. CPP uses the UTF-8 encoding of Unicode. If you request textual output from the preprocessor with the ‘-E’ option, it will be in UTF-8.

1.2. Initial processing

Step 1. The input file is read into memory and broken into lines.

Different systems use different conventions to indicate the end of a line. GCC accepts the ASCII control sequences LF, CR LF and CR as end-of-line markers.

Step 2. Trigraph Replacement

If trigraphs are enabled, they are replaced by their corresponding single characters. By default GCC ignores trigraphs, but if you request a strictly conforming mode with the ‘-std’ option, or you specify the ‘-trigraphs’ option, then it converts them.These are nine three-character sequences, all starting with ‘??’, that are defined by ISO C to stand for single characters.

Trigraph
??( ??) ??< ??> ??= ??/ ??’ ??! ??-

Replacement
[ ] { } # \ ^ | ~

Trigraphs and digraphs were created for programmers that didn't have a keyboard which supported the ISO 646 character set.

Step 3. Continued lines are merged into one long line.

A continued line is a line which ends with a backslash, ‘\’. The backslash is removed and the following line is joined with the current one. No space is inserted, so you may split a line anywhere, even in the middle of a word.If there is white space between a backslash and the end of a line, that is still a continued line. However, as this is usually the result of an editing mistake, and many compilers will not accept it as a continued line, GCC will warn you about it.

Step 4. All comments are replaced with single spaces

/\
*
*/ # /*
*/ defi\
ne NAM\
E Arno\
ld

is equivalent to #define NAME Arnold

1.3. Tokenization

After the textual transformations are finished, the input file is converted into a sequence of preprocessing tokens. These mostly correspond to the syntactic tokens used by the C compiler, but there are a few differences.Preprocessing tokens fall into five broad classes: identifiers, preprocessing numbers, string literals, punctuators, and other.

2. Macros

2.1. Predefined macros

ANSI C defines a number of macros. Although each one is available for use in programming, the predefined macros should not be directly modified.

Macro

Description

__DATE__ The current date as a character literal in "MMM DD YYYY" format.

__TIME__ The current time as a character literal in "HH:MM:SS" format.

__FILE__ This contains the current filename as a string literal.

__LINE__ This contains the current line number as a decimal constant.

__STDC__ Defined as 1 when the compiler complies with the ANSI standard.

Example:

main() {
   printf("File :%s\n", __FILE__ );
   printf("Date :%s\n", __DATE__ );
   printf("Time :%s\n", __TIME__ );
   printf("Line :%d\n", __LINE__ );
   printf("ANSI :%d\n", __STDC__ );
}

2.2. Object-like macros

An object-like macro is defined with a line of the form:

#define identifier token-sequence

where identifier will be replaced with token-sequence wherever identifier appears in regular text.Example:

#define BUFFER_SIZE 1024
#define NUMBERS 1,2,3
char *buf = (char *) malloc (BUFFER_SIZE);
int x[] = {NUMBERS};

2.3. Function-like macros

A function-like macro is defined with a line of the form:

#define identifier(identifier-list) token-sequence

Where the macro parameters are contained in the comma-separated identifier-list. The token-sequence following the identifier list determines the behavior of the macro, and is referred to as the replacement list. There can be no space between the identifier and the ``('' character. For example:

#define PRINT_INT(x) printf("x = %d\n", x)

=> PRINT_INT (2);<=> printf("x = %d\n", 2);

2.4. Preprocessor operators

- The Macro continuation Operator(\):

The macro continuation operator (\) is used to continue a macro that is too long for a single line. For example:

#define NUMBERS 1,\
                2,\
                3
int x[] = {NUMBERS};

<=> x[] = {1, 2, 3};

- The Stringification Operator (#):

The Stringification or number-sign operator ( '#' ), when used within a macro definition, converts a macro parameter into a string constant. This operator may be used only in a macro having a specified argument or parameter list. For example:

#define PRINT_INT(x) printf(#x " = %d\n", x)

=> PRINT_INT (2);

<=> <->printf("2 = %d\n", 2);

- The Concatenation Operator (##):The Concatenation operator (##) within a macro definition combines two arguments. It permits two separate tokens in the macro definition to be joined into a single token. For example:

#define MAX(type, x, y)\
type type##_max(type x, type y) \
{\
 return (x > y ? x : y);\
}
MAX (float, x, y)

<=>

float float_max(float x, float y)
{
 return (x > y ? x : y);
}

- The Defined() Operator:

The preprocessor defined operator is used in constant expressions to determine if an identifier is defined using #define. If the specified identifier is defined, the value is true (non-zero). If the symbol is not defined, the value is false (zero). The defined operator is specified as follows:

#if !defined (MAX)
        #define MAX 1000
#endif

2.5. Variadic macros

A macro can be declared to accept a variable number of arguments much as a function can. The syntax for defining the macro is similar to that of a function. Here is an example:

#define TRACE_LOG(fmt, args...) fprintf(stdout, fmt, ##args);

In main function:

TRACE_LOG("Array: ");
for (i = 0; i < 3; i ++) {
       TRACE_LOG ("arr[%d] = %d\t", i, arr[i]);
}
printf("\n");

3. File inclusion

- #include "filename"

This variant is used for header files of your own program. It searches for a file named file first in the directory containing the current file, then in the quote directories and then the same directories used for <file>. You can prepend directories to the list of quote directories with the ‘-iquote’ option.
- #include <filename>

This variant is used for system header files. It searches for a file named file in a standard list of system directories. You can prepend directories to this list with the ‘-I’ option

4. Conditional compilation

Directive

Description

#ifdef Returns true if this macro is defined.

#ifndef Returns true if this macro is not defined.

#if Tests if a compile time condition is true.

#else The alternative for #if.

#elif #else and #if in one statement.

#endif Ends preprocessor conditional

Other preprocessor directives: Assertions, Version control, Error generation, Pragmas.
You can find the example code at here and the lib.h library

Reference:

1. http://www.tutorialspoint.com/cprogramming/c_preprocessors.htm

2. http://osr507doc.sco.com/en/tools/clang_cpp_directives.html#clang_cpp_include

3. https://gcc.gnu.org/onlinedocs/cpp/Macros.html
4. https://en.wikipedia.org/wiki/Digraphs_and_trigraphs#C

Create static library, shared library

1. Static library

1.1. Example code files

/* mod1.c */

#include stdio.h
#include unistd.h
int test1[250000] = { 1, 2, 3 };

#ifndef VERSION
#define VERSION ""
#endif

void 
x1(void) { 
    printf("Called mod1-x1 " VERSION "\n"); 
    sleep(10);
    printf("exiting x1\n");
} 

/* mod2.c */

#include stdio.h
#include unistd.h
int test2[100000] = { 1, 2, 3 };

#ifndef VERSION
#define VERSION ""
#endif

void 
x2(void) { 
    printf("Called mod2-x2 " VERSION "\n"); 
    sleep(10);
    printf("exiting x2\n");
} 


/* mod3.c */

#include stdio.h
#include unistd.h
int test3[10000] = { 1, 2, 3 };

#ifndef VERSION
#define VERSION ""
#endif

void 
x3(void) { 
    printf("Called mod3-x3 " VERSION "\n"); 
    sleep(10);
    printf("exiting x3\n");
} 


/*#* prog.c */

#include stdio.h 
#include stdlib.h

void x1(void);
void x2(void);

int
main(int argc, char *argv[])
{
    x1();
    x2();
    exit(EXIT_SUCCESS);
} /* main */

1.2. Simple way of building a program:

$ cc -c -g prog.c mod1.c mod2.c mod3.c
$ cc -o prog_nolib prog.o mod1.o mod2.o mod3.o

1.3. Use static library

Compile object file:

$ cc -c mod1.c mod2.c mod3.c

Create library “libtest.a” :

$ ar -cvq libtest.a mod1.c mod2.c mod3.c

List file in library:

$ ar -t libtest.a

Linking executable file with library:

$ cc –o prog prog.c libtest.a
Or
$ cc –o prog prog.c –L/path/to/library-directory –lctest

1.4. Disvantages:

Disk space is wasted wasted storing multiple copies of object modules.
Memory is wasted when several different programs using the same modules execute at the same time, because each program holds hold separate copies of the object modules in virtual memory.
Programs must be relinked in order to see changes to object modules.

2. Shared library

2.1. Concept and Advantages

Create single copy of all object modules, which is shared by all applications at run-time

Advantages:

- Programs do not need to be relinked to see changes in library code(Because object modules are not copied into executable files)
- Programs are smaller: disk space and virtual memory requirements are reduced.

2.2. Create and using a shared library

Create the object modules using -fPIC:

$ gcc -fPIC -g -c -Wall mod1.c mod2.c mod3.c

-fPIC (Position Independent Code) causes compiler to generate code that can be loaded anywhere in virtual memory at run-time.

Create the shared library:

$ gcc -shared -o libfoo.so mod1.o mod2.o mod3.o

Link program against library :

$ gcc -g -Wall -o prog prog.c libfoo.so

or:

$ gcc -g -Wall -o prog prog.c -L. –lfoo

Execute the program

$ ./prog

./prog: error in loading shared libraries: libfoo.so: cannot open shared object file: No such file or directory - We need to give the dynamic linker information on how to find our shared library at run-time

- Dynamic linker searches these directories before looking in standard library directories.

$ LD_LIBRARY_PATH=. ./prog

Called mod1-x1

exiting x1

Called mod2-x2

exiting x2

2.3. The shared library soname

In the earlier example, we embedded the actual name (the real name) of the shared library in an executable file.
It is possible to create an alias, called the soname, which will be embedded in an executable file instead of the real name.
At run-time, the dynamic linker will use the soname when searching for the library

The purpose of the soname is to provide a level of indirection: At run-time, executable can use a version of the shared library that is different (but compatible) from that against which it was linked

Specify soname when creating shared library

$ gcc -fPIC -c -Wall -g mod1.c mod2.c mod3.c
$ gcc -shared -Wl,-soname,libbar.so -o libfoo.so mod1.o mod2.o mod3.o
-Wl,-soname,libbar.so instructs linker to mark the shared library libfoo.so with the soname libbar.so.

Create executable:

$ gcc -g -Wall -o prog prog.c libfoo.so
Linker detects that libfoo.so contains the soname libbar.so and embeds the latter name inside the executable.

Run the program:

$ LD_LIBRARY_PATH=. ./prog
./prog: error in loading shared libraries: libbar.so: cannot open shared object file: No such file or directory
Dynamic linker cannot find anything named libbar.so.

Create a symbolic link from the soname to the real name of the library

$ ln -s libfoo.so libbar.so
$ LD_LIBRARY_PATH=. ./prog
Called mod1-x1
Called mod2-x2

At run-time this link can point to a version of the library which is different from the version against which linking was performed.

fig 1. Steps required in building a shared library, linking a program against it, and creating the required soname symbolic link.

fig 2. The steps that occur as the program is executed

2.4. Shared library versions and naming

If a new version of a shared library is compatible with an existing library, we can make a new minor version of the library.

If a new version of a shared library is incompatible with an existing library, we must make a new major version of the library.

2.5. Creating shared libraries using standard naming conventions

Create the shared library with real name libdemo.so.1.0.1 and soname libdemo.so.1.

$ gcc -fPIC -g -c -Wall mod1.c mod2.c mod3.c

$ gcc -shared -Wl,-soname,libdemo.so.1 -o libdemo.so.1.0.1 mod1.o mod2.o mod3.o

Create symbolic links for the soname and linker name:

$ ln -s libdemo.so.1.0.1 libdemo.so.1

$ ln -s libdemo.so.1 libdemo.so

Build executable using the linker name:

$ gcc -g -Wall -o ./prog prog.c -L. -ldemo

Run the program as usual:

$ LD_LIBRARY_PATH=. ./prog

Called mod1-x1

Called mod2-x2

2.6. Installing shared libraries

Production applications should not require the user to set LD_LIBRARY_PATH
We can install a shared library in one of the standard library directories

- /usr/lib/ - directory in which most standard library directories:
- /lib - directory containing libraries
- /usr/local/lib - non-standard or experimental libraries should be install here

After installation, symbolic links for soname and linker name must be created, as relative links in the same directory.
Example install in /usr/lib

$ mv libdemo.so.1.0.1 /usr/lib
cd /usr/lib
ln -s libdemo.so.1.0.1 libdemo.so.1
ln -s lindemo.so.1 libdemo.so

2.7. Upgrading Shared libraries

Example - upgrading a shared library with a new minor version (1.0.2)

$ gcc -fPIC -g -c -Wall mod1.c mod2.c mod3.c
$ gcc -shared -Wl,-soname,libdemo.so.1 -o libdemo.so.1.0.2 mod1.o mod2.o mod3.o
$ mv libdemo.so.1.0.2 /usr/lib
$ ldconfig -v | grep libdemo
libdemo.so.1 -> libdemo.so.1.0.2 (changed)

Assuming the linker name was already correctly set up, we do not need to modify it.
Then upgrading to a new major version (2.0.1):

$ gcc -fPIC -g -c -Wall mod1.c mod2.c mod3.c
$ gcc -shared -Wl,-soname,libdemo.so.2 -o libdemo.so.2.0.1 mod1.o mod2.o mod3.o

                $ mv libdemo.so.2.0.1 /usr/lib

                $ ldconfig -v | grep libdemo

                          libdemo.so.1 -> libdemo.so.1.0.2

                          libdemo.so.2 -> libdemo.so.2.0.1 (changed)

                $ cd /usr/lib

                $ ln -sf libdemo.so.2 libdemo.so


ldconfig automatically creates a soname symbolic link, but we must manually update the linker name symbolic link.


2.8. Dynamically Loaded Libraries
The dlopen API allows a program to :

Open a shared library at run time
search for function( or variable ) by name in lobrary
and the call the function ( or access the variable)


dlopen API:

Four key functions: dlopen(), dlerror(), dlsym(), and dlclose().




void *dlopen(const char *filename, int flag);

dlopen() opens library, and increments the library's count of open references.

If filename contains a slash (/), dlopen() interprets it as an (absolute or relative) pathname...


otherwise it employs the usual library search rules. 


The flag argument is a bit mask that must include one of the following:

              - RTLD_LAZY: symbol referenced by library should be resolved only as code is executed. 
              - RTLD_NOW: all symbols references should be resolved before dlopen() completes

dlopen() returns a handle that is used to refer to the library in later calls, or NULL if an error occurred (e.g., library couldn't be found).


const char *dlerror(void);

dlerror() returns a pointer to a message string after an error return from dlopen(), dlsym(), or dlclose().

Returns NULL if no error has occurred since last call to dlerror().




void *dlsym(void *handle, char *symbol);

dlsym() searches a library to find a symbol (e.g., a function) and return its address.

For a function, we can dereference this pointer to call the function. 


Returns NULL if symbol could not be found. 


Complication: the value of a symbol may be NULL (0), which is indistinguishable from "symbol not found". To differentiate the two possibilities, call dlerror() beforehand.


int dlclose(void *handle);

dlclose() decrements the count of open references to the library.

If reference count falls to zero, library is unloaded. 




Example- dynload.c

#define _GNU_SOURCE
/*#}*/
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h


int
main(int argc, char *argv[])
{
    void *libHandle;  /* Handle for shared library */
    void (*funcp)(void); /* Pointer to function with no args */
    char *err;
    
    if (argc != 3) {  /* Check command line arguments */
        fprintf(stderr, "Usage: %s lib-path func-name\n", argv[0]);
        exit(EXIT_FAILURE);
    } /* if */ 

    /* Load the shared library and get a handle for later use */
    
    libHandle = dlopen(argv[1], RTLD_LAZY);
    if (libHandle == NULL) {
 fprintf(stderr, "Error on dlopen: %s\n", dlerror());
 exit(EXIT_FAILURE);
    } /* if */

    /* Get a pointer to named function inside library */
    
    (void) dlerror();    /* Clear dlerror() */
    funcp = dlsym(libHandle, argv[2]);
    err = dlerror();     
    if (err != NULL) {  /* Non-NULL from dlerror() means we got error */
 fprintf(stderr, "Error on dlsym: %s\n", err);
 exit(EXIT_FAILURE);
    } /* if */

/*#{{*/
    {
 Dl_info dli;
 dladdr(funcp, &dli);

 printf("Lib name = %s\n", dli.dli_fname);
 printf("Load address = 0x%lx\n", (long) dli.dli_fbase);
 printf("Name of nearest sym = %s\n", dli.dli_sname);
 printf("Load address = 0x%lx\n", (long) dli.dli_saddr);
    }
/*#}*/

    /* If the function address is non-NULL try calling it */

    if (funcp == NULL)
 printf("%s is NULL\n", argv[2]);
    else
        (*funcp)();

    /* And close the library */

    dlclose(libHandle);

    exit(EXIT_SUCCESS);
} /* main */

Compile and run this program:

Reference:

http://man7.org/conf/lca2006/shared_libraries/index.html

Trigraph	??(	??)	??<	??>	??=	??/	??’	??!	??-
Replacement	[	]	{	}	#	\	^	\|	~

Macro	Description
__DATE__	The current date as a character literal in "MMM DD YYYY" format.
__TIME__	The current time as a character literal in "HH:MM:SS" format.
__FILE__	This contains the current filename as a string literal.
__LINE__	This contains the current line number as a decimal constant.
__STDC__	Defined as 1 when the compiler complies with the ANSI standard.

Directive	Description
#ifdef	Returns true if this macro is defined.
#ifndef	Returns true if this macro is not defined.
#if	Tests if a compile time condition is true.
#else	The alternative for #if.
#elif	#else and #if in one statement.
#endif	Ends preprocessor conditional

My Blog List

Monday, August 22, 2016

Sorting and Searching Algorithms

1. Sorting

1.1 bubble sort

1.2 insertion sort

1.3 selection sort

1.4 quick sort

1.5 heap sort

2. Searching

References

Sunday, August 21, 2016

Hash function

Problem discription.

Definition.

Basic operations

Advantages

Disadvantages

A simple example

Choosing a good hash function

Collision resolutions.

Separate chaining.

Open addressing.

Implement a simple hash map in C programming language.

References

Preprocessor

1. Overview

1.1. Character sets

1.2. Initial processing

Step 1. The input file is read into memory and broken into lines.

Step 2. Trigraph Replacement

Step 3. Continued lines are merged into one long line.

Step 4. All comments are replaced with single spaces

1.3. Tokenization

2. Macros

2.1. Predefined macros

2.2. Object-like macros

2.3. Function-like macros

2.4. Preprocessor operators

2.5. Variadic macros

3. File inclusion

Reference:

Create static library, shared library

1. Static library

1.1. Example code files

1.2. Simple way of building a program:

1.3. Use static library

1.4. Disvantages:

2. Shared library

2.1. Concept and Advantages

2.2. Create and using a shared library

2.3. The shared library soname

2.4. Shared library versions and naming

2.5. Creating shared libraries using standard naming conventions

2.6. Installing shared libraries

2.7. Upgrading Shared libraries

2.8. Dynamically Loaded Libraries