How do I delete everything after second occurrence of quotes using the command line?

149

Solution 1

Using awk:

awk -v RS='"' -v ORS='"' 'NR==1{print} NR==2{print; printf"\n";exit}' file

This sets the record separator to ". So, we want to print the first two records and then we are done. In more detail:

  • -v RS='"'

    This sets the input record separator to a double quote.

  • -v ORS='"'

    This sets the out record separator to a double quote.

  • NR==1{print}

    This tells awk to print the first line.

  • NR==2{print; printf"\n";exit}

    This tells awk to print the second line, then print a newline character, and then exit.

Using sed

sed -r 'H;1h;$!d;x; s/(([^"]*"){2}).*/\1/' file

This reads the whole file in at once. So, if the file is huge, don't use this approach. It works as follows:

  • H;1h;$!d;x

    This is a useful sed idiom: it reads the whole file in at once.

  • s/(([^"]*"){2}).*/\1/

    This looks for the second " and then deletes all text which follows the second quote.

    The regex (([^"]*"){2}) captures all text up to and including the second double quote and saves it in group 1. The regex .* captures everything that follows to the end of the file. The replacement text is group 1, \1.

Solution 2

Though many programs dont like very long lines as input, when your data is not huge you can often simplify multi-line matching by first manipulating the data to put it all on one line, doing the match, then restoring the newlines.

For example, use tr to replace newline \n by some character not in your data (I used carriage-return `\r'), use sed to change this single line, then tr the character back:

tr '\n' '\r' < file |
sed 's/\("[^"]*"\).*/\1/' |
( tr '\r' '\n';  echo ) # add a final newline

Otherwise, though you state you want sed/awk/grep, languages like perl and python use similar regular expressions as these and are good for manipulating multi-line strings. Eg perl:

perl -e '$_ = join("",<>); s/(".*?").*/$1/s; print "$_\n"; ' file

Solution 3

Using Perl:

< infile perl -0777 -pe 's/((.*?"){2}).*/$1/s' > outfile
  • -0777: slurps the whole file at once instead of one line at the time
  • -p: places a while (<>) {[...]} loop around the script and prints the processed file
  • -e: reads the script from the arguments

Perl command breakdown:

  • s: asserts to perform a substitution
  • /: starts the pattern
  • ((.*?"){2}): matches and groups any number of any character zero or more times lazily within the current file (i.e. it matches the least times as possible, stopping when the following pattern starts to match) before a " character, twice
  • .*: matches any number of any character zero or more times greedily within the current file (i.e. it matches the most times as possible)
  • /: stops the pattern / starts the replacement string
  • $1: replaces with the first captured group
  • /: stops the replacement string / starts the modifiers
  • s: treats the whole file as a single line, allowing . to match also newlines

Solution 4

Here is a little python script:

#!/usr/bin/env python2
with open('/path/to/file.txt') as f:
    print '"'.join(f.read().split('"')[:2]) + '"'
  • f.read().split('"') will read the whole file as a string and then split it on " to get all the " separated portions

  • As we are interested in only the first two " separated portions, '"'.join(f.read().split('"')[:2]) will join the first two with "

  • Then at last we have added a " to get the desired format.

Share:
149

Related videos on Youtube

Mike
Author by

Mike

Updated on September 18, 2022

Comments

  • Mike
    Mike over 1 year

    I'm trying and failing to adapt some code that passes an int matrix into something that will pass through command line arguments. What do I need to do to pass strings over sockets? I'm using Debian.

    Attached is the original code I'm working with that just creates and sends an array of squares of the first 30 numbers and prints basic acceptance info.

    Command line inputs would look like

    Server: ./Server_code <port>

    Client: ./Client_code <address> <port> [arg1] [arg2] [arg3] [argn]

    All I want to do is send the arguments to the server to be processed.

    Client_Code

    /* Client code */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <errno.h>
    #include <string.h>
    #include <netdb.h>
    #include <sys/types.h>
    #include <netinet/in.h>
    #include <sys/socket.h>
    #include <unistd.h>
    
    #define MAXDATASIZE 100 /* max number of bytes we can get at once */
    
    #define ARRAY_SIZE 30
    
    void Send_Array_Data(int socket_id, int *myArray)
    {
        int i = 0;
        uint16_t statistics;
        for (i = 0; i < ARRAY_SIZE; i++)
        {
            statistics = htons(myArray[i]);
            send(socket_id, &statistics, sizeof(uint16_t), 0);
        }
    }
    
    int main(int argc, char *argv[])
    {
        int sockfd, numbytes, i = 0;
        char buf[MAXDATASIZE];
        struct hostent *he;
        struct sockaddr_in their_addr; /* connector's address information */
    
        if (argc != 3)
        {
            fprintf(stderr, "usage: client_hostname port_number\n");
            exit(1);
        }
    
        if ((he = gethostbyname(argv[1])) == NULL)
        { /* get the host info */
            herror("gethostbyname");
            exit(1);
        }
    
        if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1)
        {
            perror("socket");
            exit(1);
        }
    
        /* clear address struct */
        memset(&their_addr, 0, sizeof(their_addr));
    
        their_addr.sin_family = AF_INET;            /* host byte order */
        their_addr.sin_port = htons(atoi(argv[2])); /* short, network byte order */
        their_addr.sin_addr = *((struct in_addr *)he->h_addr);
    
        if (connect(sockfd, (struct sockaddr *)&their_addr,
                    sizeof(struct sockaddr)) == -1)
        {
            perror("connect");
            exit(1);
        }
    
        /* Create an array of squares of first 30 whole numbers */
        int simpleArray[ARRAY_SIZE] = {0};
        for (i = 0; i < ARRAY_SIZE; i++)
        {
            simpleArray[i] = i * i;
        }
    
        Send_Array_Data(sockfd, simpleArray);
    
        /* Receive message back from server */
        if ((numbytes = recv(sockfd, buf, MAXDATASIZE, 0)) == -1)
        {
            perror("recv");
            exit(1);
        }
    
        buf[numbytes] = '\0';
    
        buf[numbytes] = '\0';
    
        printf("Received: %s", buf);
    
        close(sockfd);
        return 0;
    

    Server_Code

    /* Server Code */
    #include <arpa/inet.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <errno.h>
    #include <string.h>
    #include <sys/types.h>
    #include <netinet/in.h>
    #include <sys/socket.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #include <errno.h>
    
    #define ARRAY_SIZE 30 /* Size of array to receive */
    
    #define BACKLOG 10 /* how many pending connections queue will hold */
    
    #define RETURNED_ERROR -1
    
    int *Receive_Array_Int_Data(int socket_identifier, int size)
    {
        int number_of_bytes, i = 0;
        uint16_t statistics;
    
        int *results = malloc(sizeof(int) * size);
        for (i = 0; i < size; i++)
        {
            if ((number_of_bytes = recv(socket_identifier, &statistics, sizeof(uint16_t), 0)) == RETURNED_ERROR)
            {
                perror("rec");
                exit(EXIT_FAILURE);
            }
            results[i] = ntohs(statistics);
        }
        return results;
    }
    
    int main(int argc, char *argv[])
    {
        int sockfd, new_fd;            /* listen on sock_fd, new connection on new_fd */
        struct sockaddr_in my_addr;    /* my address information */
        struct sockaddr_in their_addr; /* connector's address information */
        socklen_t sin_size;
        int i = 0;
    
        /* Get port number for server to listen on */
        if (argc != 2)
        {
            fprintf(stderr, "usage: port_number\n");
            exit(1);
        }
    
        /* generate the socket */
        if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1)
        {
            perror("socket");
            exit(1);
        }
    
        /* Enable address/port reuse, useful for server development */
        int opt_enable = 1;
        setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt_enable, sizeof(opt_enable));
        setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &opt_enable, sizeof(opt_enable));
    
        /* clear address struct */
        memset(&my_addr, 0, sizeof(my_addr));
    
        /* generate the end point */
        my_addr.sin_family = AF_INET;            /* host byte order */
        my_addr.sin_port = htons(atoi(argv[1])); /* short, network byte order */
        my_addr.sin_addr.s_addr = INADDR_ANY;    /* auto-fill with my IP */
    
        /* bind the socket to the end point */
        if (bind(sockfd, (struct sockaddr *)&my_addr, sizeof(struct sockaddr)) == -1)
        {
            perror("bind");
            exit(1);
        }
    
        /* start listnening */
        if (listen(sockfd, BACKLOG) == -1)
        {
            perror("listen");
            exit(1);
        }
    
        printf("server starts listnening ...\n");
    
        /* repeat: accept, send, close the connection */
        /* for every accepted connection, use a sepetate process or thread to serve it */
        while (1)
        { /* main accept() loop */
            sin_size = sizeof(struct sockaddr_in);
            if ((new_fd = accept(sockfd, (struct sockaddr *)&their_addr,
                                 &sin_size)) == -1)
            {
                perror("accept");
                continue;
            }
            printf("server: got connection from %s\n",
                   inet_ntoa(their_addr.sin_addr));
            if (!fork())
            { /* this is the child process */
    
                /* Call method to recieve array data */
                int *results = Receive_Array_Int_Data(new_fd, ARRAY_SIZE);
    
                /* Print out the array results sent by client */
                for (i = 0; i < ARRAY_SIZE; i++)
                {
                    printf("Value of index[%d] = %d\n", i, results[i]);
                }
    
                free(results);
    
                if (send(new_fd, "All of array data received by server\n", 40, 0) == -1)
                    perror("send");
                close(new_fd);
                exit(0);
            }
            else
            {
                close(new_fd); /* parent doesn't need this */
            }
            while (waitpid(-1, NULL, WNOHANG) > 0)
                ; /* clean up child processes */
        }
    }
    
  • Sergiy Kolodyazhnyy
    Sergiy Kolodyazhnyy almost 9 years
    Suggestion: try NR < 3. That should shorten the code.